diff --git "a/experiment_config.json" "b/experiment_config.json" new file mode 100644--- /dev/null +++ "b/experiment_config.json" @@ -0,0 +1,333581 @@ +{ + "training_args": { + "output_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_ff_v1", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": true, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 4, + "per_device_eval_batch_size": 8, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 4, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 2e-05, + "weight_decay": 0.0, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3, + "max_steps": -1, + "lr_scheduler_type": "linear", + "lr_scheduler_kwargs": {}, + "warmup_ratio": 0.0, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_ff_v1/runs/Sep10_02-15-40_gx08", + "logging_strategy": "steps", + "logging_first_step": false, + "logging_steps": 20, + "logging_nan_inf_filter": true, + "save_strategy": "epoch", + "save_steps": 500, + "save_total_limit": null, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "seed": 42, + "data_seed": null, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": false, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": [], + "dataloader_drop_last": false, + "eval_steps": 147, + "dataloader_num_workers": 0, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_ff_v1", + "disable_tqdm": false, + "remove_unused_columns": true, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": null, + "greater_is_better": null, + "ignore_data_skip": false, + "fsdp": [], + "fsdp_min_num_params": 0, + "fsdp_config": { + "min_num_params": 0, + "xla": false, + "xla_fsdp_v2": false, + "xla_fsdp_grad_ckpt": false + }, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "split_batches": false, + "dispatch_batches": null, + "even_batches": true, + "use_seedable_sampler": true, + "non_blocking": false, + "gradient_accumulation_kwargs": null + }, + "deepspeed": null, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_token": "", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": false, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": "", + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false + }, + "lora_config": null, + "flops": { + "eval": 105693667713235200, + "train": 53674555878669600, + "total": 159368223591904800 + }, + "total_energy": 126.84330000000001, + "logs": [ + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:16:01.984520", + "step": 0, + "epoch": 0 + }, + { + "type": "pplx", + "content": 54140675.446864516, + "timestamp": "2025-09-10 02:16:01.988837", + "step": 0, + "epoch": 0 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:02.062821", + "step": 0, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.6008338332176208, + "timestamp": "2025-09-10 02:16:02.064796", + "step": 1, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:02.110853", + "step": 1, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.5395371317863464, + "timestamp": "2025-09-10 02:16:02.115148", + "step": 2, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:02.146065", + "step": 2, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.547315239906311, + "timestamp": "2025-09-10 02:16:02.152973", + "step": 3, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:02.197495", + "step": 3, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.6588919758796692, + "timestamp": "2025-09-10 02:16:02.248579", + "step": 4, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:02.280581", + "step": 4, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.2344198077917099, + "timestamp": "2025-09-10 02:16:02.284584", + "step": 5, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:02.335992", + "step": 5, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.18304279446601868, + "timestamp": "2025-09-10 02:16:02.338088", + "step": 6, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:02.368461", + "step": 6, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.1824495494365692, + "timestamp": "2025-09-10 02:16:02.375219", + "step": 7, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:02.418461", + "step": 7, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.21861636638641357, + "timestamp": "2025-09-10 02:16:02.443617", + "step": 8, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:02.500477", + "step": 8, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0901818498969078, + "timestamp": "2025-09-10 02:16:02.503792", + "step": 9, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:02.534851", + "step": 9, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06925918161869049, + "timestamp": "2025-09-10 02:16:02.542382", + "step": 10, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:02.581087", + "step": 10, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06179536134004593, + "timestamp": "2025-09-10 02:16:02.587234", + "step": 11, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:02.625224", + "step": 11, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.060144226998090744, + "timestamp": "2025-09-10 02:16:02.652677", + "step": 12, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:02.700676", + "step": 12, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04763566702604294, + "timestamp": "2025-09-10 02:16:02.704489", + "step": 13, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:02.757375", + "step": 13, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04455741122364998, + "timestamp": "2025-09-10 02:16:02.765877", + "step": 14, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:02.807895", + "step": 14, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03932064399123192, + "timestamp": "2025-09-10 02:16:02.812005", + "step": 15, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:02.847941", + "step": 15, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02747354283928871, + "timestamp": "2025-09-10 02:16:02.874022", + "step": 16, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:02.926383", + "step": 16, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02676134742796421, + "timestamp": "2025-09-10 02:16:02.930699", + "step": 17, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:02.967263", + "step": 17, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036269430071115494, + "timestamp": "2025-09-10 02:16:02.971943", + "step": 18, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:03.013820", + "step": 18, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025049904361367226, + "timestamp": "2025-09-10 02:16:03.023342", + "step": 19, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:03.064093", + "step": 19, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038132019340991974, + "timestamp": "2025-09-10 02:16:03.092030", + "step": 20, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:03.131090", + "step": 20, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02351507358253002, + "timestamp": "2025-09-10 02:16:03.138086", + "step": 21, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:03.174188", + "step": 21, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016791896894574165, + "timestamp": "2025-09-10 02:16:03.177900", + "step": 22, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:03.210824", + "step": 22, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028946993872523308, + "timestamp": "2025-09-10 02:16:03.217309", + "step": 23, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:03.248257", + "step": 23, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030578048899769783, + "timestamp": "2025-09-10 02:16:03.276442", + "step": 24, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:03.307551", + "step": 24, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0187423974275589, + "timestamp": "2025-09-10 02:16:03.311793", + "step": 25, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:03.343360", + "step": 25, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01975913718342781, + "timestamp": "2025-09-10 02:16:03.350089", + "step": 26, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:03.380780", + "step": 26, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0228941161185503, + "timestamp": "2025-09-10 02:16:03.387364", + "step": 27, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:03.436367", + "step": 27, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02786724641919136, + "timestamp": "2025-09-10 02:16:03.460600", + "step": 28, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:03.493390", + "step": 28, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045785628259181976, + "timestamp": "2025-09-10 02:16:03.498028", + "step": 29, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:03.533826", + "step": 29, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004484932404011488, + "timestamp": "2025-09-10 02:16:03.546978", + "step": 30, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:03.578360", + "step": 30, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05362967774271965, + "timestamp": "2025-09-10 02:16:03.584874", + "step": 31, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:03.616459", + "step": 31, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06305649876594543, + "timestamp": "2025-09-10 02:16:03.644134", + "step": 32, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:03.677074", + "step": 32, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015654366463422775, + "timestamp": "2025-09-10 02:16:03.679219", + "step": 33, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:03.710464", + "step": 33, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02429381012916565, + "timestamp": "2025-09-10 02:16:03.717986", + "step": 34, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:03.749170", + "step": 34, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04219824820756912, + "timestamp": "2025-09-10 02:16:03.758869", + "step": 35, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:03.789600", + "step": 35, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05330771207809448, + "timestamp": "2025-09-10 02:16:03.813089", + "step": 36, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:03.843661", + "step": 36, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013570256531238556, + "timestamp": "2025-09-10 02:16:03.848084", + "step": 37, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:03.893622", + "step": 37, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01802876405417919, + "timestamp": "2025-09-10 02:16:03.895915", + "step": 38, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:03.928023", + "step": 38, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02937530353665352, + "timestamp": "2025-09-10 02:16:03.932421", + "step": 39, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:03.962845", + "step": 39, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03161190077662468, + "timestamp": "2025-09-10 02:16:03.990473", + "step": 40, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:04.022146", + "step": 40, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03022809512913227, + "timestamp": "2025-09-10 02:16:04.026833", + "step": 41, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:04.058668", + "step": 41, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027601536363363266, + "timestamp": "2025-09-10 02:16:04.062673", + "step": 42, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:04.097743", + "step": 42, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021324804052710533, + "timestamp": "2025-09-10 02:16:04.111039", + "step": 43, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:04.143441", + "step": 43, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020267976447939873, + "timestamp": "2025-09-10 02:16:04.171221", + "step": 44, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:04.202635", + "step": 44, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02561989612877369, + "timestamp": "2025-09-10 02:16:04.204754", + "step": 45, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:04.235907", + "step": 45, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025528931990265846, + "timestamp": "2025-09-10 02:16:04.242862", + "step": 46, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:04.273959", + "step": 46, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021888835355639458, + "timestamp": "2025-09-10 02:16:04.283691", + "step": 47, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:04.315341", + "step": 47, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027732163667678833, + "timestamp": "2025-09-10 02:16:04.342901", + "step": 48, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:04.379523", + "step": 48, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03233006224036217, + "timestamp": "2025-09-10 02:16:04.383378", + "step": 49, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:04.414652", + "step": 49, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021881645545363426, + "timestamp": "2025-09-10 02:16:04.425174", + "step": 50, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:04.455872", + "step": 50, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03416941687464714, + "timestamp": "2025-09-10 02:16:04.458081", + "step": 51, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 928 + ], + "flops": 27527278844800 + }, + "timestamp": "2025-09-10 02:16:04.632897", + "step": 51, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027697524055838585, + "timestamp": "2025-09-10 02:16:04.656929", + "step": 52, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:04.694123", + "step": 52, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032309334725141525, + "timestamp": "2025-09-10 02:16:04.696565", + "step": 53, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:04.732462", + "step": 53, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014240605756640434, + "timestamp": "2025-09-10 02:16:04.739274", + "step": 54, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:04.773962", + "step": 54, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03165564686059952, + "timestamp": "2025-09-10 02:16:04.779367", + "step": 55, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:04.829001", + "step": 55, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028052538633346558, + "timestamp": "2025-09-10 02:16:04.861010", + "step": 56, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:04.896301", + "step": 56, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02410052716732025, + "timestamp": "2025-09-10 02:16:04.898695", + "step": 57, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:04.932687", + "step": 57, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028389716520905495, + "timestamp": "2025-09-10 02:16:04.938045", + "step": 58, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:04.970808", + "step": 58, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02223231829702854, + "timestamp": "2025-09-10 02:16:04.979603", + "step": 59, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:05.012882", + "step": 59, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025019675493240356, + "timestamp": "2025-09-10 02:16:05.040525", + "step": 60, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:05.075959", + "step": 60, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02373148687183857, + "timestamp": "2025-09-10 02:16:05.083328", + "step": 61, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:05.118418", + "step": 61, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0228324793279171, + "timestamp": "2025-09-10 02:16:05.123087", + "step": 62, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:05.159154", + "step": 62, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02589366026222706, + "timestamp": "2025-09-10 02:16:05.165199", + "step": 63, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:05.196884", + "step": 63, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02020171843469143, + "timestamp": "2025-09-10 02:16:05.224420", + "step": 64, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:16:05.262460", + "step": 64, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019750652834773064, + "timestamp": "2025-09-10 02:16:05.277816", + "step": 65, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:05.309484", + "step": 65, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022475482895970345, + "timestamp": "2025-09-10 02:16:05.316076", + "step": 66, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:05.349346", + "step": 66, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031203726306557655, + "timestamp": "2025-09-10 02:16:05.351899", + "step": 67, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:05.383549", + "step": 67, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026068750768899918, + "timestamp": "2025-09-10 02:16:05.408325", + "step": 68, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:05.441620", + "step": 68, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020249370485544205, + "timestamp": "2025-09-10 02:16:05.445702", + "step": 69, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:16:05.485622", + "step": 69, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02251577563583851, + "timestamp": "2025-09-10 02:16:05.501212", + "step": 70, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:05.533112", + "step": 70, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02423388697206974, + "timestamp": "2025-09-10 02:16:05.540249", + "step": 71, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:05.580456", + "step": 71, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016923097893595695, + "timestamp": "2025-09-10 02:16:05.605965", + "step": 72, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:05.641491", + "step": 72, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029709434136748314, + "timestamp": "2025-09-10 02:16:05.644029", + "step": 73, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:05.677693", + "step": 73, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013269062153995037, + "timestamp": "2025-09-10 02:16:05.683412", + "step": 74, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:05.716854", + "step": 74, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03226935863494873, + "timestamp": "2025-09-10 02:16:05.725697", + "step": 75, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:05.765814", + "step": 75, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020103048533201218, + "timestamp": "2025-09-10 02:16:05.802277", + "step": 76, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:05.833640", + "step": 76, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018879475072026253, + "timestamp": "2025-09-10 02:16:05.837910", + "step": 77, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:05.869054", + "step": 77, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038980383425951004, + "timestamp": "2025-09-10 02:16:05.872959", + "step": 78, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:05.906349", + "step": 78, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029591679573059082, + "timestamp": "2025-09-10 02:16:05.919579", + "step": 79, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:05.958106", + "step": 79, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014890284277498722, + "timestamp": "2025-09-10 02:16:05.994447", + "step": 80, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:06.026426", + "step": 80, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009303289465606213, + "timestamp": "2025-09-10 02:16:06.028566", + "step": 81, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:06.059374", + "step": 81, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02382597140967846, + "timestamp": "2025-09-10 02:16:06.071305", + "step": 82, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:06.103494", + "step": 82, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009986629709601402, + "timestamp": "2025-09-10 02:16:06.113405", + "step": 83, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:06.146437", + "step": 83, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022146521136164665, + "timestamp": "2025-09-10 02:16:06.171621", + "step": 84, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:06.203414", + "step": 84, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03081861138343811, + "timestamp": "2025-09-10 02:16:06.207612", + "step": 85, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:06.241382", + "step": 85, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042918942868709564, + "timestamp": "2025-09-10 02:16:06.254938", + "step": 86, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:06.286234", + "step": 86, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01463954895734787, + "timestamp": "2025-09-10 02:16:06.290359", + "step": 87, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:06.321404", + "step": 87, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025244222953915596, + "timestamp": "2025-09-10 02:16:06.349501", + "step": 88, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:16:06.393581", + "step": 88, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018959475681185722, + "timestamp": "2025-09-10 02:16:06.410750", + "step": 89, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:06.442741", + "step": 89, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019290607422590256, + "timestamp": "2025-09-10 02:16:06.449555", + "step": 90, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:06.480195", + "step": 90, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037016745656728745, + "timestamp": "2025-09-10 02:16:06.486836", + "step": 91, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:06.520237", + "step": 91, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01576540246605873, + "timestamp": "2025-09-10 02:16:06.554391", + "step": 92, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:06.586174", + "step": 92, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029999637976288795, + "timestamp": "2025-09-10 02:16:06.588193", + "step": 93, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:06.618830", + "step": 93, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007616397459059954, + "timestamp": "2025-09-10 02:16:06.625809", + "step": 94, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:16:06.672877", + "step": 94, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041450273245573044, + "timestamp": "2025-09-10 02:16:06.687746", + "step": 95, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:06.719554", + "step": 95, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028501790016889572, + "timestamp": "2025-09-10 02:16:06.746921", + "step": 96, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:06.780163", + "step": 96, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031911808997392654, + "timestamp": "2025-09-10 02:16:06.785472", + "step": 97, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:06.816090", + "step": 97, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010332711972296238, + "timestamp": "2025-09-10 02:16:06.820432", + "step": 98, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:06.850962", + "step": 98, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014578322879970074, + "timestamp": "2025-09-10 02:16:06.855363", + "step": 99, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:06.886347", + "step": 99, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030410753563046455, + "timestamp": "2025-09-10 02:16:06.914188", + "step": 100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:06.945693", + "step": 100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019498659297823906, + "timestamp": "2025-09-10 02:16:06.948062", + "step": 101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:06.981389", + "step": 101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02664143405854702, + "timestamp": "2025-09-10 02:16:06.988136", + "step": 102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:07.018987", + "step": 102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0366455540060997, + "timestamp": "2025-09-10 02:16:07.026481", + "step": 103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:07.057910", + "step": 103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021227413788437843, + "timestamp": "2025-09-10 02:16:07.090878", + "step": 104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:07.121568", + "step": 104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016079608350992203, + "timestamp": "2025-09-10 02:16:07.130132", + "step": 105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:07.161498", + "step": 105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01837443746626377, + "timestamp": "2025-09-10 02:16:07.168957", + "step": 106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:07.199456", + "step": 106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029402051120996475, + "timestamp": "2025-09-10 02:16:07.206169", + "step": 107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:07.237182", + "step": 107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021664408966898918, + "timestamp": "2025-09-10 02:16:07.265754", + "step": 108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:07.295601", + "step": 108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01921442337334156, + "timestamp": "2025-09-10 02:16:07.303172", + "step": 109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:07.333506", + "step": 109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016907794401049614, + "timestamp": "2025-09-10 02:16:07.340549", + "step": 110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:07.372342", + "step": 110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018889309838414192, + "timestamp": "2025-09-10 02:16:07.384880", + "step": 111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:07.416259", + "step": 111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014976476319134235, + "timestamp": "2025-09-10 02:16:07.444498", + "step": 112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:07.476485", + "step": 112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020062191411852837, + "timestamp": "2025-09-10 02:16:07.481612", + "step": 113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:07.511720", + "step": 113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023766087368130684, + "timestamp": "2025-09-10 02:16:07.518727", + "step": 114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:07.549994", + "step": 114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021067747846245766, + "timestamp": "2025-09-10 02:16:07.557452", + "step": 115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:07.589333", + "step": 115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020303750410676003, + "timestamp": "2025-09-10 02:16:07.617297", + "step": 116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:07.649366", + "step": 116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02873547188937664, + "timestamp": "2025-09-10 02:16:07.661942", + "step": 117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:07.692247", + "step": 117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021697448566555977, + "timestamp": "2025-09-10 02:16:07.699045", + "step": 118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:07.730118", + "step": 118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02336110547184944, + "timestamp": "2025-09-10 02:16:07.742698", + "step": 119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:07.777761", + "step": 119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02601124718785286, + "timestamp": "2025-09-10 02:16:07.810365", + "step": 120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:07.841544", + "step": 120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019523756578564644, + "timestamp": "2025-09-10 02:16:07.851022", + "step": 121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:07.881927", + "step": 121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019188281148672104, + "timestamp": "2025-09-10 02:16:07.891951", + "step": 122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:07.922749", + "step": 122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020111748948693275, + "timestamp": "2025-09-10 02:16:07.929577", + "step": 123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:07.961525", + "step": 123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02041424997150898, + "timestamp": "2025-09-10 02:16:07.993208", + "step": 124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:08.023902", + "step": 124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03184106573462486, + "timestamp": "2025-09-10 02:16:08.026634", + "step": 125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:08.056664", + "step": 125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02083834446966648, + "timestamp": "2025-09-10 02:16:08.066958", + "step": 126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:16:08.102647", + "step": 126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025340793654322624, + "timestamp": "2025-09-10 02:16:08.116615", + "step": 127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:08.148390", + "step": 127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021747667342424393, + "timestamp": "2025-09-10 02:16:08.171962", + "step": 128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:08.203715", + "step": 128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025707753375172615, + "timestamp": "2025-09-10 02:16:08.213867", + "step": 129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:08.245075", + "step": 129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01971745304763317, + "timestamp": "2025-09-10 02:16:08.247284", + "step": 130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:08.279079", + "step": 130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02148953266441822, + "timestamp": "2025-09-10 02:16:08.286537", + "step": 131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:08.317714", + "step": 131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02072383277118206, + "timestamp": "2025-09-10 02:16:08.342913", + "step": 132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:08.374235", + "step": 132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021854082122445107, + "timestamp": "2025-09-10 02:16:08.379220", + "step": 133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:08.411433", + "step": 133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016318751499056816, + "timestamp": "2025-09-10 02:16:08.419281", + "step": 134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:08.449943", + "step": 134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02498047612607479, + "timestamp": "2025-09-10 02:16:08.462062", + "step": 135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:08.493203", + "step": 135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02256803587079048, + "timestamp": "2025-09-10 02:16:08.521197", + "step": 136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:08.551083", + "step": 136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017909932881593704, + "timestamp": "2025-09-10 02:16:08.560671", + "step": 137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:08.591595", + "step": 137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019151031970977783, + "timestamp": "2025-09-10 02:16:08.598213", + "step": 138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:08.627700", + "step": 138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01293948758393526, + "timestamp": "2025-09-10 02:16:08.634698", + "step": 139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:08.664634", + "step": 139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014859228394925594, + "timestamp": "2025-09-10 02:16:08.693246", + "step": 140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:08.725823", + "step": 140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030040746554732323, + "timestamp": "2025-09-10 02:16:08.738785", + "step": 141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:08.769782", + "step": 141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015265722759068012, + "timestamp": "2025-09-10 02:16:08.777294", + "step": 142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:08.807493", + "step": 142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02200271561741829, + "timestamp": "2025-09-10 02:16:08.814902", + "step": 143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:08.845397", + "step": 143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023353280499577522, + "timestamp": "2025-09-10 02:16:08.873964", + "step": 144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:08.904724", + "step": 144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01525102648884058, + "timestamp": "2025-09-10 02:16:08.912397", + "step": 145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:08.942660", + "step": 145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02498231828212738, + "timestamp": "2025-09-10 02:16:08.949473", + "step": 146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:08.979736", + "step": 146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0227807704359293, + "timestamp": "2025-09-10 02:16:08.989868", + "step": 147, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:16:20.735353", + "step": 147, + "epoch": 1 + }, + { + "type": "pplx", + "content": 12191892.104022551, + "timestamp": "2025-09-10 02:16:20.742172", + "step": 147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:20.777302", + "step": 147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01853315904736519, + "timestamp": "2025-09-10 02:16:20.804672", + "step": 148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:20.847582", + "step": 148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01748022995889187, + "timestamp": "2025-09-10 02:16:20.852109", + "step": 149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:20.892051", + "step": 149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016961688175797462, + "timestamp": "2025-09-10 02:16:20.898764", + "step": 150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:16:20.955270", + "step": 150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03245147690176964, + "timestamp": "2025-09-10 02:16:20.968525", + "step": 151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:21.030461", + "step": 151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023213069885969162, + "timestamp": "2025-09-10 02:16:21.061308", + "step": 152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:21.113790", + "step": 152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02709483541548252, + "timestamp": "2025-09-10 02:16:21.117104", + "step": 153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:21.150904", + "step": 153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02957731857895851, + "timestamp": "2025-09-10 02:16:21.157774", + "step": 154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:21.192385", + "step": 154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0102442791685462, + "timestamp": "2025-09-10 02:16:21.204856", + "step": 155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:21.237494", + "step": 155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026265621185302734, + "timestamp": "2025-09-10 02:16:21.262502", + "step": 156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:21.295340", + "step": 156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022335294634103775, + "timestamp": "2025-09-10 02:16:21.298538", + "step": 157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:21.332105", + "step": 157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011588959954679012, + "timestamp": "2025-09-10 02:16:21.344331", + "step": 158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:21.377620", + "step": 158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019390691071748734, + "timestamp": "2025-09-10 02:16:21.390111", + "step": 159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:21.421280", + "step": 159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02968760021030903, + "timestamp": "2025-09-10 02:16:21.449406", + "step": 160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:21.481678", + "step": 160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03552708774805069, + "timestamp": "2025-09-10 02:16:21.484727", + "step": 161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:21.517175", + "step": 161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014869497157633305, + "timestamp": "2025-09-10 02:16:21.522775", + "step": 162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:21.556750", + "step": 162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022128138691186905, + "timestamp": "2025-09-10 02:16:21.563452", + "step": 163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:21.595358", + "step": 163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014087623916566372, + "timestamp": "2025-09-10 02:16:21.623091", + "step": 164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:21.655243", + "step": 164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010876579210162163, + "timestamp": "2025-09-10 02:16:21.659393", + "step": 165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:21.692113", + "step": 165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02489648386836052, + "timestamp": "2025-09-10 02:16:21.701867", + "step": 166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:21.734164", + "step": 166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02895858697593212, + "timestamp": "2025-09-10 02:16:21.745496", + "step": 167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:21.780855", + "step": 167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02555564045906067, + "timestamp": "2025-09-10 02:16:21.815551", + "step": 168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:21.847095", + "step": 168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02748963236808777, + "timestamp": "2025-09-10 02:16:21.859757", + "step": 169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:21.891258", + "step": 169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013958572410047054, + "timestamp": "2025-09-10 02:16:21.894827", + "step": 170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:21.925187", + "step": 170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02482200227677822, + "timestamp": "2025-09-10 02:16:21.929660", + "step": 171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:21.960629", + "step": 171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012857136316597462, + "timestamp": "2025-09-10 02:16:21.994081", + "step": 172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:22.026787", + "step": 172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013918432407081127, + "timestamp": "2025-09-10 02:16:22.039823", + "step": 173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:22.069812", + "step": 173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020305844023823738, + "timestamp": "2025-09-10 02:16:22.073912", + "step": 174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:22.106095", + "step": 174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01923571154475212, + "timestamp": "2025-09-10 02:16:22.112980", + "step": 175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:22.145283", + "step": 175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02769598178565502, + "timestamp": "2025-09-10 02:16:22.177975", + "step": 176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:22.210470", + "step": 176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01644458808004856, + "timestamp": "2025-09-10 02:16:22.214646", + "step": 177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:16:22.265692", + "step": 177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02010306902229786, + "timestamp": "2025-09-10 02:16:22.287206", + "step": 178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:22.318980", + "step": 178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020176881924271584, + "timestamp": "2025-09-10 02:16:22.331133", + "step": 179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:22.361522", + "step": 179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016681145876646042, + "timestamp": "2025-09-10 02:16:22.386733", + "step": 180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:22.417832", + "step": 180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021172260865569115, + "timestamp": "2025-09-10 02:16:22.420092", + "step": 181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:22.453254", + "step": 181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028826581314206123, + "timestamp": "2025-09-10 02:16:22.466939", + "step": 182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:16:22.501100", + "step": 182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023097632452845573, + "timestamp": "2025-09-10 02:16:22.515058", + "step": 183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:22.545590", + "step": 183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027354659512639046, + "timestamp": "2025-09-10 02:16:22.570952", + "step": 184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:22.600953", + "step": 184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022371714934706688, + "timestamp": "2025-09-10 02:16:22.605532", + "step": 185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:22.637703", + "step": 185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02118653617799282, + "timestamp": "2025-09-10 02:16:22.644663", + "step": 186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:22.675209", + "step": 186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005522268824279308, + "timestamp": "2025-09-10 02:16:22.679864", + "step": 187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:22.710239", + "step": 187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017090152949094772, + "timestamp": "2025-09-10 02:16:22.741196", + "step": 188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:22.770746", + "step": 188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02689528279006481, + "timestamp": "2025-09-10 02:16:22.775360", + "step": 189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:22.807716", + "step": 189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017292974516749382, + "timestamp": "2025-09-10 02:16:22.815250", + "step": 190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:22.845775", + "step": 190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019936595112085342, + "timestamp": "2025-09-10 02:16:22.852588", + "step": 191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:16:22.886413", + "step": 191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01834060624241829, + "timestamp": "2025-09-10 02:16:22.921163", + "step": 192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:22.952122", + "step": 192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009056499227881432, + "timestamp": "2025-09-10 02:16:22.957463", + "step": 193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:22.987267", + "step": 193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02178066037595272, + "timestamp": "2025-09-10 02:16:22.991642", + "step": 194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:23.024415", + "step": 194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023802533745765686, + "timestamp": "2025-09-10 02:16:23.030527", + "step": 195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:23.062627", + "step": 195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02088129334151745, + "timestamp": "2025-09-10 02:16:23.091433", + "step": 196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:23.125721", + "step": 196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03935558721423149, + "timestamp": "2025-09-10 02:16:23.133504", + "step": 197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:23.169617", + "step": 197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02025543339550495, + "timestamp": "2025-09-10 02:16:23.176598", + "step": 198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:23.208914", + "step": 198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012568363919854164, + "timestamp": "2025-09-10 02:16:23.216126", + "step": 199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:23.246371", + "step": 199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013729465194046497, + "timestamp": "2025-09-10 02:16:23.278223", + "step": 200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:23.309443", + "step": 200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01759318821132183, + "timestamp": "2025-09-10 02:16:23.313924", + "step": 201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:23.344145", + "step": 201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007888035848736763, + "timestamp": "2025-09-10 02:16:23.351604", + "step": 202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:23.382940", + "step": 202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00965458806604147, + "timestamp": "2025-09-10 02:16:23.390314", + "step": 203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:23.423895", + "step": 203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01958434283733368, + "timestamp": "2025-09-10 02:16:23.452560", + "step": 204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:23.481703", + "step": 204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007743260823190212, + "timestamp": "2025-09-10 02:16:23.487143", + "step": 205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:23.517951", + "step": 205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004702796693891287, + "timestamp": "2025-09-10 02:16:23.525371", + "step": 206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:23.555186", + "step": 206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01660262979567051, + "timestamp": "2025-09-10 02:16:23.562112", + "step": 207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:23.593403", + "step": 207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01599551923573017, + "timestamp": "2025-09-10 02:16:23.626330", + "step": 208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:23.658169", + "step": 208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013740134425461292, + "timestamp": "2025-09-10 02:16:23.662707", + "step": 209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:23.693156", + "step": 209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017071815207600594, + "timestamp": "2025-09-10 02:16:23.696982", + "step": 210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:23.731153", + "step": 210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021906418725848198, + "timestamp": "2025-09-10 02:16:23.738722", + "step": 211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:23.775191", + "step": 211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008046785369515419, + "timestamp": "2025-09-10 02:16:23.800487", + "step": 212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:23.830266", + "step": 212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006279070395976305, + "timestamp": "2025-09-10 02:16:23.832367", + "step": 213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:23.862918", + "step": 213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025564759969711304, + "timestamp": "2025-09-10 02:16:23.866636", + "step": 214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:23.898046", + "step": 214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01694483682513237, + "timestamp": "2025-09-10 02:16:23.905447", + "step": 215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:23.938422", + "step": 215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028261274099349976, + "timestamp": "2025-09-10 02:16:23.966060", + "step": 216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:23.995793", + "step": 216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008908641524612904, + "timestamp": "2025-09-10 02:16:23.997994", + "step": 217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:24.033911", + "step": 217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049135930836200714, + "timestamp": "2025-09-10 02:16:24.047306", + "step": 218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:24.077407", + "step": 218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009086393751204014, + "timestamp": "2025-09-10 02:16:24.085796", + "step": 219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:24.121929", + "step": 219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01568550243973732, + "timestamp": "2025-09-10 02:16:24.155378", + "step": 220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:24.187632", + "step": 220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011114334687590599, + "timestamp": "2025-09-10 02:16:24.200647", + "step": 221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:24.237581", + "step": 221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02597637288272381, + "timestamp": "2025-09-10 02:16:24.244807", + "step": 222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:24.278536", + "step": 222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011223288252949715, + "timestamp": "2025-09-10 02:16:24.285951", + "step": 223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:24.317555", + "step": 223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010914224199950695, + "timestamp": "2025-09-10 02:16:24.345436", + "step": 224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:16:24.397785", + "step": 224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009774766862392426, + "timestamp": "2025-09-10 02:16:24.413366", + "step": 225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:24.445516", + "step": 225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03243091329932213, + "timestamp": "2025-09-10 02:16:24.456124", + "step": 226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:24.487958", + "step": 226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005026062484830618, + "timestamp": "2025-09-10 02:16:24.498899", + "step": 227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:24.531723", + "step": 227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017378708347678185, + "timestamp": "2025-09-10 02:16:24.557445", + "step": 228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:24.588636", + "step": 228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030252641066908836, + "timestamp": "2025-09-10 02:16:24.601298", + "step": 229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:24.633811", + "step": 229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02279387228190899, + "timestamp": "2025-09-10 02:16:24.646266", + "step": 230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:24.683908", + "step": 230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008336501196026802, + "timestamp": "2025-09-10 02:16:24.699519", + "step": 231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:24.730452", + "step": 231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016707872971892357, + "timestamp": "2025-09-10 02:16:24.761385", + "step": 232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:24.794054", + "step": 232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023161133751273155, + "timestamp": "2025-09-10 02:16:24.803510", + "step": 233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:24.839446", + "step": 233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011448432691395283, + "timestamp": "2025-09-10 02:16:24.851561", + "step": 234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:24.887091", + "step": 234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034271273761987686, + "timestamp": "2025-09-10 02:16:24.894082", + "step": 235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:24.924893", + "step": 235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014886337332427502, + "timestamp": "2025-09-10 02:16:24.952841", + "step": 236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:24.991322", + "step": 236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02193574421107769, + "timestamp": "2025-09-10 02:16:25.010432", + "step": 237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:25.054375", + "step": 237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009645821526646614, + "timestamp": "2025-09-10 02:16:25.061131", + "step": 238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:25.100555", + "step": 238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009037821553647518, + "timestamp": "2025-09-10 02:16:25.107882", + "step": 239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:25.148957", + "step": 239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014126168563961983, + "timestamp": "2025-09-10 02:16:25.185418", + "step": 240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:25.218887", + "step": 240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005452022887766361, + "timestamp": "2025-09-10 02:16:25.224457", + "step": 241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:25.254336", + "step": 241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03313310071825981, + "timestamp": "2025-09-10 02:16:25.261497", + "step": 242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:25.291853", + "step": 242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03182428702712059, + "timestamp": "2025-09-10 02:16:25.302738", + "step": 243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:25.338675", + "step": 243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029811818152666092, + "timestamp": "2025-09-10 02:16:25.366539", + "step": 244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:25.397901", + "step": 244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03838468715548515, + "timestamp": "2025-09-10 02:16:25.402821", + "step": 245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:25.438631", + "step": 245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02467919886112213, + "timestamp": "2025-09-10 02:16:25.445474", + "step": 246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:25.479124", + "step": 246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00823969580233097, + "timestamp": "2025-09-10 02:16:25.486640", + "step": 247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:16:25.528750", + "step": 247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011138495989143848, + "timestamp": "2025-09-10 02:16:25.565512", + "step": 248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:25.596292", + "step": 248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03702753037214279, + "timestamp": "2025-09-10 02:16:25.601440", + "step": 249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:25.634575", + "step": 249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007849356159567833, + "timestamp": "2025-09-10 02:16:25.647926", + "step": 250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:25.678453", + "step": 250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00440265703946352, + "timestamp": "2025-09-10 02:16:25.685668", + "step": 251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:25.715578", + "step": 251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027811271138489246, + "timestamp": "2025-09-10 02:16:25.749017", + "step": 252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:25.779626", + "step": 252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002288882387802005, + "timestamp": "2025-09-10 02:16:25.781615", + "step": 253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:25.811818", + "step": 253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02566692791879177, + "timestamp": "2025-09-10 02:16:25.819734", + "step": 254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:25.851082", + "step": 254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016085701063275337, + "timestamp": "2025-09-10 02:16:25.857937", + "step": 255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:25.888181", + "step": 255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007969672791659832, + "timestamp": "2025-09-10 02:16:25.916936", + "step": 256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:25.948366", + "step": 256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02336304821074009, + "timestamp": "2025-09-10 02:16:25.953983", + "step": 257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:25.986175", + "step": 257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010644437745213509, + "timestamp": "2025-09-10 02:16:25.993642", + "step": 258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:26.026399", + "step": 258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009769896045327187, + "timestamp": "2025-09-10 02:16:26.030862", + "step": 259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:26.063269", + "step": 259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015422756783664227, + "timestamp": "2025-09-10 02:16:26.091962", + "step": 260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:26.123418", + "step": 260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031315069645643234, + "timestamp": "2025-09-10 02:16:26.136180", + "step": 261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:26.166267", + "step": 261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033414822071790695, + "timestamp": "2025-09-10 02:16:26.173215", + "step": 262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:26.206363", + "step": 262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015460536815226078, + "timestamp": "2025-09-10 02:16:26.208832", + "step": 263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:26.240930", + "step": 263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030592020601034164, + "timestamp": "2025-09-10 02:16:26.272768", + "step": 264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:26.302446", + "step": 264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01532017719000578, + "timestamp": "2025-09-10 02:16:26.307982", + "step": 265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:26.341058", + "step": 265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033243995159864426, + "timestamp": "2025-09-10 02:16:26.354418", + "step": 266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:26.387090", + "step": 266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017378225922584534, + "timestamp": "2025-09-10 02:16:26.394754", + "step": 267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:26.424486", + "step": 267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02742266096174717, + "timestamp": "2025-09-10 02:16:26.448727", + "step": 268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:26.482635", + "step": 268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04506625607609749, + "timestamp": "2025-09-10 02:16:26.487290", + "step": 269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:26.518809", + "step": 269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013364973478019238, + "timestamp": "2025-09-10 02:16:26.531381", + "step": 270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:26.562970", + "step": 270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01263825036585331, + "timestamp": "2025-09-10 02:16:26.569994", + "step": 271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:26.602400", + "step": 271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03316551819443703, + "timestamp": "2025-09-10 02:16:26.630937", + "step": 272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:26.675334", + "step": 272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019756343215703964, + "timestamp": "2025-09-10 02:16:26.680035", + "step": 273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:26.715960", + "step": 273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0236830972135067, + "timestamp": "2025-09-10 02:16:26.723355", + "step": 274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:26.761266", + "step": 274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00928380899131298, + "timestamp": "2025-09-10 02:16:26.768864", + "step": 275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:26.810695", + "step": 275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014733054675161839, + "timestamp": "2025-09-10 02:16:26.835676", + "step": 276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:26.871465", + "step": 276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03336886316537857, + "timestamp": "2025-09-10 02:16:26.884467", + "step": 277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:26.916086", + "step": 277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008176090195775032, + "timestamp": "2025-09-10 02:16:26.922890", + "step": 278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:26.953543", + "step": 278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010884806513786316, + "timestamp": "2025-09-10 02:16:26.960988", + "step": 279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:26.991008", + "step": 279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010812760330736637, + "timestamp": "2025-09-10 02:16:27.019416", + "step": 280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:27.052033", + "step": 280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018427478149533272, + "timestamp": "2025-09-10 02:16:27.061756", + "step": 281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:27.091729", + "step": 281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005787822883576155, + "timestamp": "2025-09-10 02:16:27.094494", + "step": 282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:27.126564", + "step": 282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026878537610173225, + "timestamp": "2025-09-10 02:16:27.133510", + "step": 283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:27.163712", + "step": 283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014809337444603443, + "timestamp": "2025-09-10 02:16:27.191924", + "step": 284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:27.231470", + "step": 284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0403473936021328, + "timestamp": "2025-09-10 02:16:27.233854", + "step": 285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:27.264630", + "step": 285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009624199941754341, + "timestamp": "2025-09-10 02:16:27.271396", + "step": 286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:27.307634", + "step": 286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00741335516795516, + "timestamp": "2025-09-10 02:16:27.314627", + "step": 287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:27.346502", + "step": 287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018726302310824394, + "timestamp": "2025-09-10 02:16:27.374123", + "step": 288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:27.404826", + "step": 288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043964944779872894, + "timestamp": "2025-09-10 02:16:27.412272", + "step": 289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:27.443455", + "step": 289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012115959543734789, + "timestamp": "2025-09-10 02:16:27.450533", + "step": 290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:27.482885", + "step": 290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016456475481390953, + "timestamp": "2025-09-10 02:16:27.495010", + "step": 291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:27.525943", + "step": 291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014793974114581943, + "timestamp": "2025-09-10 02:16:27.557732", + "step": 292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:27.588473", + "step": 292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01667448878288269, + "timestamp": "2025-09-10 02:16:27.596371", + "step": 293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:27.627182", + "step": 293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013031461276113987, + "timestamp": "2025-09-10 02:16:27.633929", + "step": 294, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:16:38.184699", + "step": 294, + "epoch": 1 + }, + { + "type": "pplx", + "content": 15332585.816633547, + "timestamp": "2025-09-10 02:16:38.187323", + "step": 294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:38.219791", + "step": 294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023444533348083496, + "timestamp": "2025-09-10 02:16:38.233473", + "step": 295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:16:38.268307", + "step": 295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008510954678058624, + "timestamp": "2025-09-10 02:16:38.303183", + "step": 296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:38.335146", + "step": 296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015145028941333294, + "timestamp": "2025-09-10 02:16:38.339032", + "step": 297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:38.370675", + "step": 297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0069735231809318066, + "timestamp": "2025-09-10 02:16:38.374348", + "step": 298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:38.412173", + "step": 298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013920980505645275, + "timestamp": "2025-09-10 02:16:38.427806", + "step": 299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:38.460108", + "step": 299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04089200869202614, + "timestamp": "2025-09-10 02:16:38.485588", + "step": 300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:38.520153", + "step": 300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013281070627272129, + "timestamp": "2025-09-10 02:16:38.523981", + "step": 301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:38.558005", + "step": 301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03492172807455063, + "timestamp": "2025-09-10 02:16:38.567580", + "step": 302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:38.600149", + "step": 302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03769215941429138, + "timestamp": "2025-09-10 02:16:38.609626", + "step": 303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:38.641634", + "step": 303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010984980501234531, + "timestamp": "2025-09-10 02:16:38.669717", + "step": 304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:38.699933", + "step": 304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016611166298389435, + "timestamp": "2025-09-10 02:16:38.707452", + "step": 305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:38.739406", + "step": 305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013240032829344273, + "timestamp": "2025-09-10 02:16:38.750082", + "step": 306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:38.780171", + "step": 306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01312293391674757, + "timestamp": "2025-09-10 02:16:38.792673", + "step": 307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:38.823157", + "step": 307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01570984721183777, + "timestamp": "2025-09-10 02:16:38.851813", + "step": 308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:38.882826", + "step": 308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011351371183991432, + "timestamp": "2025-09-10 02:16:38.887319", + "step": 309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:38.916806", + "step": 309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006130642257630825, + "timestamp": "2025-09-10 02:16:38.920935", + "step": 310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:38.952461", + "step": 310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02042931318283081, + "timestamp": "2025-09-10 02:16:38.959345", + "step": 311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:16:38.994181", + "step": 311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008902345784008503, + "timestamp": "2025-09-10 02:16:39.029134", + "step": 312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:39.062647", + "step": 312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02012813650071621, + "timestamp": "2025-09-10 02:16:39.075585", + "step": 313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:39.106912", + "step": 313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022141067311167717, + "timestamp": "2025-09-10 02:16:39.118577", + "step": 314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:39.151698", + "step": 314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03623204678297043, + "timestamp": "2025-09-10 02:16:39.154698", + "step": 315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:39.185998", + "step": 315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0135353934019804, + "timestamp": "2025-09-10 02:16:39.213392", + "step": 316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:39.243549", + "step": 316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028507256880402565, + "timestamp": "2025-09-10 02:16:39.248758", + "step": 317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:39.279221", + "step": 317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029799891635775566, + "timestamp": "2025-09-10 02:16:39.290132", + "step": 318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:39.319964", + "step": 318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02186533249914646, + "timestamp": "2025-09-10 02:16:39.326712", + "step": 319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:39.356706", + "step": 319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014612867496907711, + "timestamp": "2025-09-10 02:16:39.387845", + "step": 320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:39.425403", + "step": 320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012654599733650684, + "timestamp": "2025-09-10 02:16:39.440561", + "step": 321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:39.474992", + "step": 321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021614069119095802, + "timestamp": "2025-09-10 02:16:39.482465", + "step": 322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:39.513753", + "step": 322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012967349961400032, + "timestamp": "2025-09-10 02:16:39.526078", + "step": 323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:39.560156", + "step": 323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024872979149222374, + "timestamp": "2025-09-10 02:16:39.588086", + "step": 324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:39.619750", + "step": 324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026342902332544327, + "timestamp": "2025-09-10 02:16:39.624580", + "step": 325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:39.654950", + "step": 325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006380206905305386, + "timestamp": "2025-09-10 02:16:39.659454", + "step": 326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:39.689823", + "step": 326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014897564426064491, + "timestamp": "2025-09-10 02:16:39.694202", + "step": 327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:39.724802", + "step": 327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02040562406182289, + "timestamp": "2025-09-10 02:16:39.755826", + "step": 328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:39.787423", + "step": 328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015893712639808655, + "timestamp": "2025-09-10 02:16:39.789419", + "step": 329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:39.819626", + "step": 329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03249195218086243, + "timestamp": "2025-09-10 02:16:39.826538", + "step": 330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:39.859763", + "step": 330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025817295536398888, + "timestamp": "2025-09-10 02:16:39.866827", + "step": 331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:39.897181", + "step": 331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030507784336805344, + "timestamp": "2025-09-10 02:16:39.928222", + "step": 332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:39.958589", + "step": 332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04506632685661316, + "timestamp": "2025-09-10 02:16:39.964162", + "step": 333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:39.994622", + "step": 333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009867721237242222, + "timestamp": "2025-09-10 02:16:40.001420", + "step": 334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:40.031806", + "step": 334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03285963833332062, + "timestamp": "2025-09-10 02:16:40.038877", + "step": 335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:40.068180", + "step": 335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004922616295516491, + "timestamp": "2025-09-10 02:16:40.096072", + "step": 336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:40.125555", + "step": 336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011179156601428986, + "timestamp": "2025-09-10 02:16:40.127649", + "step": 337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:40.157666", + "step": 337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011313307099044323, + "timestamp": "2025-09-10 02:16:40.161788", + "step": 338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:40.191735", + "step": 338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008836266584694386, + "timestamp": "2025-09-10 02:16:40.196410", + "step": 339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:40.226239", + "step": 339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021027730777859688, + "timestamp": "2025-09-10 02:16:40.251163", + "step": 340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:40.282459", + "step": 340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020776817575097084, + "timestamp": "2025-09-10 02:16:40.289283", + "step": 341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:40.322361", + "step": 341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013467268086969852, + "timestamp": "2025-09-10 02:16:40.325962", + "step": 342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:40.358346", + "step": 342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017702631885185838, + "timestamp": "2025-09-10 02:16:40.365035", + "step": 343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:40.395803", + "step": 343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01885797269642353, + "timestamp": "2025-09-10 02:16:40.424139", + "step": 344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:40.455425", + "step": 344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011524977162480354, + "timestamp": "2025-09-10 02:16:40.465019", + "step": 345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:40.496082", + "step": 345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020894749090075493, + "timestamp": "2025-09-10 02:16:40.500138", + "step": 346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:40.533156", + "step": 346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022029070183634758, + "timestamp": "2025-09-10 02:16:40.543987", + "step": 347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:40.574403", + "step": 347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018113840371370316, + "timestamp": "2025-09-10 02:16:40.602678", + "step": 348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:40.632904", + "step": 348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030301451683044434, + "timestamp": "2025-09-10 02:16:40.637626", + "step": 349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:40.667865", + "step": 349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008944302797317505, + "timestamp": "2025-09-10 02:16:40.674887", + "step": 350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:40.706075", + "step": 350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015170658007264137, + "timestamp": "2025-09-10 02:16:40.716939", + "step": 351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:40.747818", + "step": 351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004507328849285841, + "timestamp": "2025-09-10 02:16:40.775419", + "step": 352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:40.805563", + "step": 352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03259176388382912, + "timestamp": "2025-09-10 02:16:40.810294", + "step": 353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:40.840532", + "step": 353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019336406141519547, + "timestamp": "2025-09-10 02:16:40.848235", + "step": 354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:40.879018", + "step": 354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009617815725505352, + "timestamp": "2025-09-10 02:16:40.891221", + "step": 355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:40.924595", + "step": 355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01103215478360653, + "timestamp": "2025-09-10 02:16:40.958887", + "step": 356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:40.992068", + "step": 356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004833351355046034, + "timestamp": "2025-09-10 02:16:41.000528", + "step": 357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:16:41.061147", + "step": 357, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029006347060203552, + "timestamp": "2025-09-10 02:16:41.084566", + "step": 358, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:41.115146", + "step": 358, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0070088389329612255, + "timestamp": "2025-09-10 02:16:41.122685", + "step": 359, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:41.153083", + "step": 359, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005866586230695248, + "timestamp": "2025-09-10 02:16:41.180827", + "step": 360, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:41.211081", + "step": 360, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005960374139249325, + "timestamp": "2025-09-10 02:16:41.216464", + "step": 361, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:41.247340", + "step": 361, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020136630162596703, + "timestamp": "2025-09-10 02:16:41.253990", + "step": 362, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:41.285009", + "step": 362, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019610974937677383, + "timestamp": "2025-09-10 02:16:41.291838", + "step": 363, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:41.322911", + "step": 363, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008913476951420307, + "timestamp": "2025-09-10 02:16:41.356086", + "step": 364, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:41.387239", + "step": 364, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011225526221096516, + "timestamp": "2025-09-10 02:16:41.392356", + "step": 365, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:41.423802", + "step": 365, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006913323421031237, + "timestamp": "2025-09-10 02:16:41.431320", + "step": 366, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:41.461790", + "step": 366, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009624729864299297, + "timestamp": "2025-09-10 02:16:41.469080", + "step": 367, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:41.499416", + "step": 367, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025887373834848404, + "timestamp": "2025-09-10 02:16:41.527240", + "step": 368, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:41.558343", + "step": 368, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008098089136183262, + "timestamp": "2025-09-10 02:16:41.562878", + "step": 369, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:41.593446", + "step": 369, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006331682205200195, + "timestamp": "2025-09-10 02:16:41.600508", + "step": 370, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:41.630146", + "step": 370, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006118200719356537, + "timestamp": "2025-09-10 02:16:41.634316", + "step": 371, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:41.665242", + "step": 371, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008842705748975277, + "timestamp": "2025-09-10 02:16:41.696961", + "step": 372, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:41.727842", + "step": 372, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035271942615509033, + "timestamp": "2025-09-10 02:16:41.732390", + "step": 373, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:41.762973", + "step": 373, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03405757620930672, + "timestamp": "2025-09-10 02:16:41.770190", + "step": 374, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:41.800748", + "step": 374, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011618994176387787, + "timestamp": "2025-09-10 02:16:41.807526", + "step": 375, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:41.838416", + "step": 375, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02129237912595272, + "timestamp": "2025-09-10 02:16:41.871884", + "step": 376, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:16:41.911666", + "step": 376, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010881869122385979, + "timestamp": "2025-09-10 02:16:41.929030", + "step": 377, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:41.962237", + "step": 377, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043674368411302567, + "timestamp": "2025-09-10 02:16:41.969644", + "step": 378, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:42.001605", + "step": 378, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026635179296135902, + "timestamp": "2025-09-10 02:16:42.007745", + "step": 379, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:42.038844", + "step": 379, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020435309037566185, + "timestamp": "2025-09-10 02:16:42.066678", + "step": 380, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:42.096798", + "step": 380, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00813285168260336, + "timestamp": "2025-09-10 02:16:42.101285", + "step": 381, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:42.131500", + "step": 381, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002828313270583749, + "timestamp": "2025-09-10 02:16:42.138351", + "step": 382, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:42.168544", + "step": 382, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009776918217539787, + "timestamp": "2025-09-10 02:16:42.175390", + "step": 383, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:42.205969", + "step": 383, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005225719418376684, + "timestamp": "2025-09-10 02:16:42.237096", + "step": 384, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:42.268769", + "step": 384, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01830691285431385, + "timestamp": "2025-09-10 02:16:42.281411", + "step": 385, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:42.311923", + "step": 385, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03338460996747017, + "timestamp": "2025-09-10 02:16:42.318762", + "step": 386, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:42.352378", + "step": 386, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009636408649384975, + "timestamp": "2025-09-10 02:16:42.356616", + "step": 387, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:42.392495", + "step": 387, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011447208002209663, + "timestamp": "2025-09-10 02:16:42.427109", + "step": 388, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:42.458611", + "step": 388, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008088217116892338, + "timestamp": "2025-09-10 02:16:42.463326", + "step": 389, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:42.496371", + "step": 389, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015474558807909489, + "timestamp": "2025-09-10 02:16:42.500585", + "step": 390, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:42.531277", + "step": 390, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02543746307492256, + "timestamp": "2025-09-10 02:16:42.542317", + "step": 391, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:42.573162", + "step": 391, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025561505928635597, + "timestamp": "2025-09-10 02:16:42.601815", + "step": 392, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:42.632716", + "step": 392, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03465661779046059, + "timestamp": "2025-09-10 02:16:42.640158", + "step": 393, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:42.671412", + "step": 393, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01359565556049347, + "timestamp": "2025-09-10 02:16:42.680964", + "step": 394, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:42.711581", + "step": 394, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007907412014901638, + "timestamp": "2025-09-10 02:16:42.718717", + "step": 395, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:42.750176", + "step": 395, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0071455794386565685, + "timestamp": "2025-09-10 02:16:42.783294", + "step": 396, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:42.816573", + "step": 396, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031619654037058353, + "timestamp": "2025-09-10 02:16:42.820323", + "step": 397, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:42.851829", + "step": 397, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004171676468104124, + "timestamp": "2025-09-10 02:16:42.858972", + "step": 398, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:42.890758", + "step": 398, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029826102778315544, + "timestamp": "2025-09-10 02:16:42.897555", + "step": 399, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:42.928308", + "step": 399, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01927870139479637, + "timestamp": "2025-09-10 02:16:42.953538", + "step": 400, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:42.985120", + "step": 400, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021446945145726204, + "timestamp": "2025-09-10 02:16:42.989056", + "step": 401, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:43.022395", + "step": 401, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007334953639656305, + "timestamp": "2025-09-10 02:16:43.031856", + "step": 402, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:43.064842", + "step": 402, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01580173708498478, + "timestamp": "2025-09-10 02:16:43.076947", + "step": 403, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:43.108486", + "step": 403, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004910886753350496, + "timestamp": "2025-09-10 02:16:43.133400", + "step": 404, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:43.165557", + "step": 404, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016585027799010277, + "timestamp": "2025-09-10 02:16:43.169480", + "step": 405, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:43.201730", + "step": 405, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025732260197401047, + "timestamp": "2025-09-10 02:16:43.208429", + "step": 406, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:43.239305", + "step": 406, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017954887822270393, + "timestamp": "2025-09-10 02:16:43.246796", + "step": 407, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:43.277735", + "step": 407, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004366433713585138, + "timestamp": "2025-09-10 02:16:43.310485", + "step": 408, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:43.342601", + "step": 408, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02379104681313038, + "timestamp": "2025-09-10 02:16:43.352185", + "step": 409, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:43.382988", + "step": 409, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02213932015001774, + "timestamp": "2025-09-10 02:16:43.390277", + "step": 410, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:43.421169", + "step": 410, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03638289123773575, + "timestamp": "2025-09-10 02:16:43.427925", + "step": 411, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:43.459158", + "step": 411, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004115985240787268, + "timestamp": "2025-09-10 02:16:43.486953", + "step": 412, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:43.521004", + "step": 412, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028757499530911446, + "timestamp": "2025-09-10 02:16:43.523691", + "step": 413, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:43.556976", + "step": 413, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02367253229022026, + "timestamp": "2025-09-10 02:16:43.562502", + "step": 414, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:43.595650", + "step": 414, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015732292085886, + "timestamp": "2025-09-10 02:16:43.601340", + "step": 415, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:43.641354", + "step": 415, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007087147329002619, + "timestamp": "2025-09-10 02:16:43.670473", + "step": 416, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:43.710572", + "step": 416, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017834067344665527, + "timestamp": "2025-09-10 02:16:43.716218", + "step": 417, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:43.755943", + "step": 417, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01305320393294096, + "timestamp": "2025-09-10 02:16:43.762054", + "step": 418, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:43.799543", + "step": 418, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0279587022960186, + "timestamp": "2025-09-10 02:16:43.808058", + "step": 419, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:43.841203", + "step": 419, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04414095729589462, + "timestamp": "2025-09-10 02:16:43.871907", + "step": 420, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:43.902653", + "step": 420, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037644851952791214, + "timestamp": "2025-09-10 02:16:43.910969", + "step": 421, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:43.940973", + "step": 421, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019706133753061295, + "timestamp": "2025-09-10 02:16:43.951822", + "step": 422, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:43.983401", + "step": 422, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011109764687716961, + "timestamp": "2025-09-10 02:16:43.993594", + "step": 423, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:44.024238", + "step": 423, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01477269921451807, + "timestamp": "2025-09-10 02:16:44.052407", + "step": 424, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:44.082738", + "step": 424, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005930650979280472, + "timestamp": "2025-09-10 02:16:44.087197", + "step": 425, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:44.116986", + "step": 425, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011241083033382893, + "timestamp": "2025-09-10 02:16:44.124430", + "step": 426, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:44.154303", + "step": 426, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01913139782845974, + "timestamp": "2025-09-10 02:16:44.158484", + "step": 427, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:44.189053", + "step": 427, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013194134458899498, + "timestamp": "2025-09-10 02:16:44.220251", + "step": 428, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:44.250583", + "step": 428, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004168748389929533, + "timestamp": "2025-09-10 02:16:44.259190", + "step": 429, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:44.290390", + "step": 429, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004601773340255022, + "timestamp": "2025-09-10 02:16:44.300694", + "step": 430, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:44.331481", + "step": 430, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011017811484634876, + "timestamp": "2025-09-10 02:16:44.343647", + "step": 431, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:16:44.385862", + "step": 431, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014921742491424084, + "timestamp": "2025-09-10 02:16:44.409528", + "step": 432, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:44.440096", + "step": 432, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03164242208003998, + "timestamp": "2025-09-10 02:16:44.444692", + "step": 433, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:44.475580", + "step": 433, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028035728260874748, + "timestamp": "2025-09-10 02:16:44.482514", + "step": 434, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:44.513112", + "step": 434, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02136605978012085, + "timestamp": "2025-09-10 02:16:44.520589", + "step": 435, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:44.551120", + "step": 435, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007327110972255468, + "timestamp": "2025-09-10 02:16:44.579190", + "step": 436, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:44.609394", + "step": 436, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010495754890143871, + "timestamp": "2025-09-10 02:16:44.619224", + "step": 437, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:44.652859", + "step": 437, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.048316050320863724, + "timestamp": "2025-09-10 02:16:44.666223", + "step": 438, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:44.697552", + "step": 438, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01725853607058525, + "timestamp": "2025-09-10 02:16:44.704625", + "step": 439, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:44.735631", + "step": 439, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018758054822683334, + "timestamp": "2025-09-10 02:16:44.764189", + "step": 440, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:44.795294", + "step": 440, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007198534905910492, + "timestamp": "2025-09-10 02:16:44.800320", + "step": 441, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:16:54.824750", + "step": 441, + "epoch": 1 + }, + { + "type": "pplx", + "content": 15501530.366672913, + "timestamp": "2025-09-10 02:16:54.827696", + "step": 441, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:54.858250", + "step": 441, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01051324512809515, + "timestamp": "2025-09-10 02:16:54.862020", + "step": 442, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:54.895486", + "step": 442, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01905696466565132, + "timestamp": "2025-09-10 02:16:54.899808", + "step": 443, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:54.931097", + "step": 443, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013812151737511158, + "timestamp": "2025-09-10 02:16:54.956332", + "step": 444, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:16:54.998895", + "step": 444, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023278802633285522, + "timestamp": "2025-09-10 02:16:55.012229", + "step": 445, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:55.043104", + "step": 445, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027200039476156235, + "timestamp": "2025-09-10 02:16:55.047443", + "step": 446, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:55.077695", + "step": 446, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022835474461317062, + "timestamp": "2025-09-10 02:16:55.083632", + "step": 447, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:55.115485", + "step": 447, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00839492492377758, + "timestamp": "2025-09-10 02:16:55.149070", + "step": 448, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:55.179000", + "step": 448, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026511041447520256, + "timestamp": "2025-09-10 02:16:55.187400", + "step": 449, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:55.223189", + "step": 449, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015092065557837486, + "timestamp": "2025-09-10 02:16:55.230247", + "step": 450, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:55.259934", + "step": 450, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017918048426508904, + "timestamp": "2025-09-10 02:16:55.267050", + "step": 451, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:55.297124", + "step": 451, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017638269811868668, + "timestamp": "2025-09-10 02:16:55.322545", + "step": 452, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:55.351949", + "step": 452, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012330381199717522, + "timestamp": "2025-09-10 02:16:55.354182", + "step": 453, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:16:55.384704", + "step": 453, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011708649806678295, + "timestamp": "2025-09-10 02:16:55.397277", + "step": 454, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:55.429707", + "step": 454, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02566845901310444, + "timestamp": "2025-09-10 02:16:55.440071", + "step": 455, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:55.470922", + "step": 455, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025598767679184675, + "timestamp": "2025-09-10 02:16:55.495870", + "step": 456, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:55.531353", + "step": 456, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012102210894227028, + "timestamp": "2025-09-10 02:16:55.537883", + "step": 457, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:55.569169", + "step": 457, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009209878742694855, + "timestamp": "2025-09-10 02:16:55.573634", + "step": 458, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:55.607979", + "step": 458, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009584350511431694, + "timestamp": "2025-09-10 02:16:55.615763", + "step": 459, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:55.651621", + "step": 459, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016596131026744843, + "timestamp": "2025-09-10 02:16:55.679599", + "step": 460, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:55.717580", + "step": 460, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02308899164199829, + "timestamp": "2025-09-10 02:16:55.719799", + "step": 461, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:55.749684", + "step": 461, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016699977219104767, + "timestamp": "2025-09-10 02:16:55.754325", + "step": 462, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:16:55.787359", + "step": 462, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02096674218773842, + "timestamp": "2025-09-10 02:16:55.800723", + "step": 463, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:16:55.831583", + "step": 463, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0141488928347826, + "timestamp": "2025-09-10 02:16:55.864678", + "step": 464, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:55.899549", + "step": 464, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02313445508480072, + "timestamp": "2025-09-10 02:16:55.904295", + "step": 465, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:55.940946", + "step": 465, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005347964819520712, + "timestamp": "2025-09-10 02:16:55.947951", + "step": 466, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:55.982414", + "step": 466, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019470447674393654, + "timestamp": "2025-09-10 02:16:55.989966", + "step": 467, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:16:56.020181", + "step": 467, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01922597922384739, + "timestamp": "2025-09-10 02:16:56.052020", + "step": 468, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:56.082140", + "step": 468, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02592761255800724, + "timestamp": "2025-09-10 02:16:56.089958", + "step": 469, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:56.120311", + "step": 469, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022186074405908585, + "timestamp": "2025-09-10 02:16:56.127526", + "step": 470, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:56.158541", + "step": 470, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022316042333841324, + "timestamp": "2025-09-10 02:16:56.168674", + "step": 471, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:56.198880", + "step": 471, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007065699901431799, + "timestamp": "2025-09-10 02:16:56.222178", + "step": 472, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:56.259536", + "step": 472, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016231011599302292, + "timestamp": "2025-09-10 02:16:56.264118", + "step": 473, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:56.294749", + "step": 473, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024839241057634354, + "timestamp": "2025-09-10 02:16:56.302103", + "step": 474, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:56.332452", + "step": 474, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04020370915532112, + "timestamp": "2025-09-10 02:16:56.336544", + "step": 475, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:56.368297", + "step": 475, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017098741605877876, + "timestamp": "2025-09-10 02:16:56.396144", + "step": 476, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:56.429707", + "step": 476, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014849187806248665, + "timestamp": "2025-09-10 02:16:56.436676", + "step": 477, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:56.475817", + "step": 477, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019702225923538208, + "timestamp": "2025-09-10 02:16:56.482832", + "step": 478, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:56.514808", + "step": 478, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027971146628260612, + "timestamp": "2025-09-10 02:16:56.524882", + "step": 479, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:56.556254", + "step": 479, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00662533612921834, + "timestamp": "2025-09-10 02:16:56.581638", + "step": 480, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:16:56.612469", + "step": 480, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017041940242052078, + "timestamp": "2025-09-10 02:16:56.618341", + "step": 481, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:56.650763", + "step": 481, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0072524151764810085, + "timestamp": "2025-09-10 02:16:56.658474", + "step": 482, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:16:56.691076", + "step": 482, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01302304957062006, + "timestamp": "2025-09-10 02:16:56.698078", + "step": 483, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:16:56.733662", + "step": 483, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008182940073311329, + "timestamp": "2025-09-10 02:16:56.764429", + "step": 484, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:56.795179", + "step": 484, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02642636187374592, + "timestamp": "2025-09-10 02:16:56.797355", + "step": 485, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:16:56.836091", + "step": 485, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025703372433781624, + "timestamp": "2025-09-10 02:16:56.851817", + "step": 486, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:56.882364", + "step": 486, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006586894392967224, + "timestamp": "2025-09-10 02:16:56.889256", + "step": 487, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:56.920015", + "step": 487, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03618357703089714, + "timestamp": "2025-09-10 02:16:56.947737", + "step": 488, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:56.980013", + "step": 488, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02346952259540558, + "timestamp": "2025-09-10 02:16:56.985354", + "step": 489, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:16:57.031808", + "step": 489, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02613210491836071, + "timestamp": "2025-09-10 02:16:57.045157", + "step": 490, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:57.074931", + "step": 490, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013658554293215275, + "timestamp": "2025-09-10 02:16:57.079421", + "step": 491, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:16:57.113555", + "step": 491, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023100513964891434, + "timestamp": "2025-09-10 02:16:57.148219", + "step": 492, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:16:57.179933", + "step": 492, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010461096651852131, + "timestamp": "2025-09-10 02:16:57.185393", + "step": 493, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:16:57.216369", + "step": 493, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01992633379995823, + "timestamp": "2025-09-10 02:16:57.220455", + "step": 494, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:16:57.251240", + "step": 494, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008644700050354004, + "timestamp": "2025-09-10 02:16:57.257576", + "step": 495, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:16:57.287936", + "step": 495, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018692122772336006, + "timestamp": "2025-09-10 02:16:57.313229", + "step": 496, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:16:57.343998", + "step": 496, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010719933547079563, + "timestamp": "2025-09-10 02:16:57.346013", + "step": 497, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:16:57.385187", + "step": 497, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014836416579782963, + "timestamp": "2025-09-10 02:16:57.401132", + "step": 498, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:16:57.439967", + "step": 498, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009858435951173306, + "timestamp": "2025-09-10 02:16:57.444529", + "step": 499, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:16:57.484973", + "step": 499, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014942965470254421, + "timestamp": "2025-09-10 02:16:57.510192", + "step": 500, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 500", + "timestamp": "2025-09-10 02:17:02.717209", + "step": 500, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:02.750251", + "step": 500, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010459995828568935, + "timestamp": "2025-09-10 02:17:02.754425", + "step": 501, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:02.786235", + "step": 501, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00723966583609581, + "timestamp": "2025-09-10 02:17:02.795255", + "step": 502, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:02.832596", + "step": 502, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023343030363321304, + "timestamp": "2025-09-10 02:17:02.839413", + "step": 503, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:02.871628", + "step": 503, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01857823319733143, + "timestamp": "2025-09-10 02:17:02.902391", + "step": 504, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:02.935396", + "step": 504, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017975622788071632, + "timestamp": "2025-09-10 02:17:02.948532", + "step": 505, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:02.994342", + "step": 505, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029632670804858208, + "timestamp": "2025-09-10 02:17:03.002054", + "step": 506, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:03.042887", + "step": 506, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030040541663765907, + "timestamp": "2025-09-10 02:17:03.050694", + "step": 507, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:03.081641", + "step": 507, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010655703954398632, + "timestamp": "2025-09-10 02:17:03.110342", + "step": 508, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:03.141107", + "step": 508, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010932376608252525, + "timestamp": "2025-09-10 02:17:03.150935", + "step": 509, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:03.189097", + "step": 509, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016881374642252922, + "timestamp": "2025-09-10 02:17:03.201651", + "step": 510, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:03.238792", + "step": 510, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011083531193435192, + "timestamp": "2025-09-10 02:17:03.252584", + "step": 511, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:03.288534", + "step": 511, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023147176951169968, + "timestamp": "2025-09-10 02:17:03.319590", + "step": 512, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:03.365430", + "step": 512, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03139190003275871, + "timestamp": "2025-09-10 02:17:03.370617", + "step": 513, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:17:03.420989", + "step": 513, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02452153153717518, + "timestamp": "2025-09-10 02:17:03.436882", + "step": 514, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:03.469459", + "step": 514, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012951391749083996, + "timestamp": "2025-09-10 02:17:03.476526", + "step": 515, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:03.523716", + "step": 515, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014777913689613342, + "timestamp": "2025-09-10 02:17:03.558303", + "step": 516, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:03.591745", + "step": 516, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03574973717331886, + "timestamp": "2025-09-10 02:17:03.604408", + "step": 517, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:03.634689", + "step": 517, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009790212847292423, + "timestamp": "2025-09-10 02:17:03.641426", + "step": 518, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:03.676660", + "step": 518, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02090480551123619, + "timestamp": "2025-09-10 02:17:03.688328", + "step": 519, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:03.725888", + "step": 519, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0349605493247509, + "timestamp": "2025-09-10 02:17:03.750814", + "step": 520, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:03.784064", + "step": 520, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010910294018685818, + "timestamp": "2025-09-10 02:17:03.789203", + "step": 521, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:03.821200", + "step": 521, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013309179805219173, + "timestamp": "2025-09-10 02:17:03.825439", + "step": 522, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:03.856521", + "step": 522, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028638780117034912, + "timestamp": "2025-09-10 02:17:03.868386", + "step": 523, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:03.900181", + "step": 523, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009006711654365063, + "timestamp": "2025-09-10 02:17:03.928643", + "step": 524, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:03.958522", + "step": 524, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02321997843682766, + "timestamp": "2025-09-10 02:17:03.963456", + "step": 525, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:03.995505", + "step": 525, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02315063215792179, + "timestamp": "2025-09-10 02:17:04.007499", + "step": 526, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:04.053767", + "step": 526, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015552300028502941, + "timestamp": "2025-09-10 02:17:04.060843", + "step": 527, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:04.092583", + "step": 527, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029168089851737022, + "timestamp": "2025-09-10 02:17:04.123678", + "step": 528, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:04.157849", + "step": 528, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003137963591143489, + "timestamp": "2025-09-10 02:17:04.163087", + "step": 529, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:04.196961", + "step": 529, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03553525730967522, + "timestamp": "2025-09-10 02:17:04.204759", + "step": 530, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:04.239239", + "step": 530, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022633006796240807, + "timestamp": "2025-09-10 02:17:04.251415", + "step": 531, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:04.282407", + "step": 531, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016931835561990738, + "timestamp": "2025-09-10 02:17:04.311018", + "step": 532, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:17:04.351459", + "step": 532, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010154195129871368, + "timestamp": "2025-09-10 02:17:04.368471", + "step": 533, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:04.402207", + "step": 533, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035087209194898605, + "timestamp": "2025-09-10 02:17:04.409146", + "step": 534, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:04.439763", + "step": 534, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013602891936898232, + "timestamp": "2025-09-10 02:17:04.444187", + "step": 535, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 688 + ], + "flops": 20408222954560 + }, + "timestamp": "2025-09-10 02:17:04.512971", + "step": 535, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010147054679691792, + "timestamp": "2025-09-10 02:17:04.557938", + "step": 536, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:04.588485", + "step": 536, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009458757936954498, + "timestamp": "2025-09-10 02:17:04.598718", + "step": 537, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:04.632882", + "step": 537, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01887761428952217, + "timestamp": "2025-09-10 02:17:04.640519", + "step": 538, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:04.672166", + "step": 538, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013571500778198242, + "timestamp": "2025-09-10 02:17:04.679851", + "step": 539, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:04.710094", + "step": 539, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018864035606384277, + "timestamp": "2025-09-10 02:17:04.734715", + "step": 540, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:04.766172", + "step": 540, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014328965917229652, + "timestamp": "2025-09-10 02:17:04.775922", + "step": 541, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:04.807076", + "step": 541, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007837352342903614, + "timestamp": "2025-09-10 02:17:04.814804", + "step": 542, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:04.848106", + "step": 542, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020064374431967735, + "timestamp": "2025-09-10 02:17:04.855243", + "step": 543, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:04.895354", + "step": 543, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027344727888703346, + "timestamp": "2025-09-10 02:17:04.931857", + "step": 544, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:04.962318", + "step": 544, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035422343760728836, + "timestamp": "2025-09-10 02:17:04.969466", + "step": 545, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 560 + ], + "flops": 16611393146432 + }, + "timestamp": "2025-09-10 02:17:05.026681", + "step": 545, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009574404917657375, + "timestamp": "2025-09-10 02:17:05.046057", + "step": 546, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:05.083897", + "step": 546, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012771585024893284, + "timestamp": "2025-09-10 02:17:05.090641", + "step": 547, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:05.123234", + "step": 547, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007596482522785664, + "timestamp": "2025-09-10 02:17:05.154316", + "step": 548, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:05.185184", + "step": 548, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029550552368164062, + "timestamp": "2025-09-10 02:17:05.189804", + "step": 549, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:05.221572", + "step": 549, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005684220232069492, + "timestamp": "2025-09-10 02:17:05.226030", + "step": 550, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:05.259344", + "step": 550, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020296234637498856, + "timestamp": "2025-09-10 02:17:05.271859", + "step": 551, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:05.306189", + "step": 551, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011387856677174568, + "timestamp": "2025-09-10 02:17:05.339356", + "step": 552, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:05.370234", + "step": 552, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005133692175149918, + "timestamp": "2025-09-10 02:17:05.374915", + "step": 553, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:05.405630", + "step": 553, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015191650949418545, + "timestamp": "2025-09-10 02:17:05.417754", + "step": 554, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:05.449245", + "step": 554, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01479465514421463, + "timestamp": "2025-09-10 02:17:05.456377", + "step": 555, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:05.492055", + "step": 555, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00847632810473442, + "timestamp": "2025-09-10 02:17:05.517533", + "step": 556, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:05.558071", + "step": 556, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020174086093902588, + "timestamp": "2025-09-10 02:17:05.560370", + "step": 557, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:17:05.603236", + "step": 557, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008243663236498833, + "timestamp": "2025-09-10 02:17:05.617199", + "step": 558, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:05.650878", + "step": 558, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014073808677494526, + "timestamp": "2025-09-10 02:17:05.655400", + "step": 559, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:05.685836", + "step": 559, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012161211110651493, + "timestamp": "2025-09-10 02:17:05.711317", + "step": 560, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:05.744726", + "step": 560, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0253736712038517, + "timestamp": "2025-09-10 02:17:05.753532", + "step": 561, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:05.787108", + "step": 561, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00579653587192297, + "timestamp": "2025-09-10 02:17:05.799398", + "step": 562, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:05.844748", + "step": 562, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027781727258116007, + "timestamp": "2025-09-10 02:17:05.858553", + "step": 563, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:05.894663", + "step": 563, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.035328906029462814, + "timestamp": "2025-09-10 02:17:05.924916", + "step": 564, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:05.962930", + "step": 564, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00891299732029438, + "timestamp": "2025-09-10 02:17:05.965304", + "step": 565, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:05.998336", + "step": 565, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013710664585232735, + "timestamp": "2025-09-10 02:17:06.008845", + "step": 566, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:06.039916", + "step": 566, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00687979394569993, + "timestamp": "2025-09-10 02:17:06.046706", + "step": 567, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:06.077841", + "step": 567, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009851823560893536, + "timestamp": "2025-09-10 02:17:06.106375", + "step": 568, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:06.137339", + "step": 568, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012755611911416054, + "timestamp": "2025-09-10 02:17:06.143314", + "step": 569, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:06.174764", + "step": 569, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005232820753008127, + "timestamp": "2025-09-10 02:17:06.185075", + "step": 570, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:06.216416", + "step": 570, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025471851229667664, + "timestamp": "2025-09-10 02:17:06.220670", + "step": 571, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:06.261098", + "step": 571, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016110900789499283, + "timestamp": "2025-09-10 02:17:06.289501", + "step": 572, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:17:06.336757", + "step": 572, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018473317846655846, + "timestamp": "2025-09-10 02:17:06.353491", + "step": 573, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:06.385507", + "step": 573, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014140649698674679, + "timestamp": "2025-09-10 02:17:06.398092", + "step": 574, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:06.429189", + "step": 574, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011097794398665428, + "timestamp": "2025-09-10 02:17:06.435977", + "step": 575, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:06.466194", + "step": 575, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01416011806577444, + "timestamp": "2025-09-10 02:17:06.493932", + "step": 576, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:06.545475", + "step": 576, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014164241962134838, + "timestamp": "2025-09-10 02:17:06.553210", + "step": 577, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:06.591569", + "step": 577, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04556810483336449, + "timestamp": "2025-09-10 02:17:06.604998", + "step": 578, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:06.636536", + "step": 578, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010820058174431324, + "timestamp": "2025-09-10 02:17:06.639940", + "step": 579, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:06.673852", + "step": 579, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034028550144284964, + "timestamp": "2025-09-10 02:17:06.704601", + "step": 580, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:06.736412", + "step": 580, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016602005809545517, + "timestamp": "2025-09-10 02:17:06.741634", + "step": 581, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:06.771605", + "step": 581, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05845152586698532, + "timestamp": "2025-09-10 02:17:06.776491", + "step": 582, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:06.808863", + "step": 582, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03493461757898331, + "timestamp": "2025-09-10 02:17:06.814928", + "step": 583, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:06.846547", + "step": 583, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021650012582540512, + "timestamp": "2025-09-10 02:17:06.879990", + "step": 584, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:06.913612", + "step": 584, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019279540283605456, + "timestamp": "2025-09-10 02:17:06.918165", + "step": 585, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:06.949617", + "step": 585, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01664150133728981, + "timestamp": "2025-09-10 02:17:06.960220", + "step": 586, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:06.992181", + "step": 586, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02184317074716091, + "timestamp": "2025-09-10 02:17:06.998923", + "step": 587, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:07.043515", + "step": 587, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004326535388827324, + "timestamp": "2025-09-10 02:17:07.071694", + "step": 588, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:17:17.297865", + "step": 588, + "epoch": 1 + }, + { + "type": "pplx", + "content": 17933771.412629146, + "timestamp": "2025-09-10 02:17:17.300864", + "step": 588, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 816 + ], + "flops": 24205052762688 + }, + "timestamp": "2025-09-10 02:17:17.369599", + "step": 588, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009856624528765678, + "timestamp": "2025-09-10 02:17:17.397874", + "step": 589, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:17.433590", + "step": 589, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017127353698015213, + "timestamp": "2025-09-10 02:17:17.440647", + "step": 590, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:17.472940", + "step": 590, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011390717700123787, + "timestamp": "2025-09-10 02:17:17.482793", + "step": 591, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:17.514056", + "step": 591, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014447472058236599, + "timestamp": "2025-09-10 02:17:17.546534", + "step": 592, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:17.579394", + "step": 592, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003005419624969363, + "timestamp": "2025-09-10 02:17:17.583121", + "step": 593, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:17.615124", + "step": 593, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00944295059889555, + "timestamp": "2025-09-10 02:17:17.622720", + "step": 594, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:17.660679", + "step": 594, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009788398630917072, + "timestamp": "2025-09-10 02:17:17.665069", + "step": 595, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:17:17.701220", + "step": 595, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012333549559116364, + "timestamp": "2025-09-10 02:17:17.736145", + "step": 596, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:17.769079", + "step": 596, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01467831339687109, + "timestamp": "2025-09-10 02:17:17.773289", + "step": 597, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:17.807430", + "step": 597, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03252703696489334, + "timestamp": "2025-09-10 02:17:17.814593", + "step": 598, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:17.849955", + "step": 598, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012095707468688488, + "timestamp": "2025-09-10 02:17:17.857669", + "step": 599, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:17.890183", + "step": 599, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03608888015151024, + "timestamp": "2025-09-10 02:17:17.918339", + "step": 600, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:17.951914", + "step": 600, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019412686815485358, + "timestamp": "2025-09-10 02:17:17.956404", + "step": 601, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:17.986749", + "step": 601, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015688760206103325, + "timestamp": "2025-09-10 02:17:17.993894", + "step": 602, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:18.026557", + "step": 602, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013548861257731915, + "timestamp": "2025-09-10 02:17:18.038440", + "step": 603, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:18.070533", + "step": 603, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.045547544956207275, + "timestamp": "2025-09-10 02:17:18.103997", + "step": 604, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:18.147664", + "step": 604, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029206562787294388, + "timestamp": "2025-09-10 02:17:18.160822", + "step": 605, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:18.200358", + "step": 605, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025844343472272158, + "timestamp": "2025-09-10 02:17:18.207248", + "step": 606, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:18.239358", + "step": 606, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008630426600575447, + "timestamp": "2025-09-10 02:17:18.249893", + "step": 607, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:18.283006", + "step": 607, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020521271508187056, + "timestamp": "2025-09-10 02:17:18.308137", + "step": 608, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:18.341145", + "step": 608, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035357114393264055, + "timestamp": "2025-09-10 02:17:18.345327", + "step": 609, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:18.377034", + "step": 609, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006719955708831549, + "timestamp": "2025-09-10 02:17:18.384473", + "step": 610, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:18.417171", + "step": 610, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004843573085963726, + "timestamp": "2025-09-10 02:17:18.421468", + "step": 611, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:18.454741", + "step": 611, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023189399391412735, + "timestamp": "2025-09-10 02:17:18.485520", + "step": 612, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:18.521681", + "step": 612, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005349505692720413, + "timestamp": "2025-09-10 02:17:18.534378", + "step": 613, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:18.566683", + "step": 613, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026773291174322367, + "timestamp": "2025-09-10 02:17:18.578531", + "step": 614, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:18.610775", + "step": 614, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02543543465435505, + "timestamp": "2025-09-10 02:17:18.614897", + "step": 615, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:18.650542", + "step": 615, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006155446171760559, + "timestamp": "2025-09-10 02:17:18.685120", + "step": 616, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:18.718426", + "step": 616, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012057982385158539, + "timestamp": "2025-09-10 02:17:18.723346", + "step": 617, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:18.757126", + "step": 617, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031552365981042385, + "timestamp": "2025-09-10 02:17:18.764412", + "step": 618, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:18.796501", + "step": 618, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024341052398085594, + "timestamp": "2025-09-10 02:17:18.804106", + "step": 619, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:18.839158", + "step": 619, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0073067969642579556, + "timestamp": "2025-09-10 02:17:18.869850", + "step": 620, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:18.902496", + "step": 620, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018821122124791145, + "timestamp": "2025-09-10 02:17:18.910508", + "step": 621, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:18.943708", + "step": 621, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015981352189555764, + "timestamp": "2025-09-10 02:17:18.947909", + "step": 622, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:18.981778", + "step": 622, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010449434630572796, + "timestamp": "2025-09-10 02:17:18.989279", + "step": 623, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:17:19.021145", + "step": 623, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006673470605164766, + "timestamp": "2025-09-10 02:17:19.046217", + "step": 624, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:19.077682", + "step": 624, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027125883847475052, + "timestamp": "2025-09-10 02:17:19.081932", + "step": 625, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:19.111910", + "step": 625, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00828898511826992, + "timestamp": "2025-09-10 02:17:19.115841", + "step": 626, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:19.148853", + "step": 626, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038712245877832174, + "timestamp": "2025-09-10 02:17:19.156549", + "step": 627, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:19.188531", + "step": 627, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012664406560361385, + "timestamp": "2025-09-10 02:17:19.219467", + "step": 628, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:19.251808", + "step": 628, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03356235474348068, + "timestamp": "2025-09-10 02:17:19.256337", + "step": 629, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:19.288914", + "step": 629, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027829742059111595, + "timestamp": "2025-09-10 02:17:19.300893", + "step": 630, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:19.334244", + "step": 630, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009094549342989922, + "timestamp": "2025-09-10 02:17:19.341336", + "step": 631, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:19.373190", + "step": 631, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018705466063693166, + "timestamp": "2025-09-10 02:17:19.401577", + "step": 632, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:19.433749", + "step": 632, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017098234966397285, + "timestamp": "2025-09-10 02:17:19.438572", + "step": 633, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:19.472952", + "step": 633, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.052354682236909866, + "timestamp": "2025-09-10 02:17:19.480464", + "step": 634, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:19.513654", + "step": 634, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055087942630052567, + "timestamp": "2025-09-10 02:17:19.520295", + "step": 635, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:19.553748", + "step": 635, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005113348830491304, + "timestamp": "2025-09-10 02:17:19.586415", + "step": 636, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:19.618032", + "step": 636, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05106746777892113, + "timestamp": "2025-09-10 02:17:19.622994", + "step": 637, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:19.660180", + "step": 637, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0263382438570261, + "timestamp": "2025-09-10 02:17:19.662427", + "step": 638, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:19.693860", + "step": 638, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007414556574076414, + "timestamp": "2025-09-10 02:17:19.700726", + "step": 639, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:19.733055", + "step": 639, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001413815887644887, + "timestamp": "2025-09-10 02:17:19.766419", + "step": 640, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:19.798671", + "step": 640, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021783774718642235, + "timestamp": "2025-09-10 02:17:19.808756", + "step": 641, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:19.840085", + "step": 641, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027584636583924294, + "timestamp": "2025-09-10 02:17:19.846841", + "step": 642, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:19.879816", + "step": 642, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03216005116701126, + "timestamp": "2025-09-10 02:17:19.887161", + "step": 643, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:19.919658", + "step": 643, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036754488945007324, + "timestamp": "2025-09-10 02:17:19.951670", + "step": 644, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:19.984226", + "step": 644, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009167312644422054, + "timestamp": "2025-09-10 02:17:19.986908", + "step": 645, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:20.020646", + "step": 645, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015368753112852573, + "timestamp": "2025-09-10 02:17:20.027113", + "step": 646, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:20.067572", + "step": 646, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025697126984596252, + "timestamp": "2025-09-10 02:17:20.071649", + "step": 647, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:20.104098", + "step": 647, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037650710437446833, + "timestamp": "2025-09-10 02:17:20.132598", + "step": 648, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:20.164187", + "step": 648, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010932421311736107, + "timestamp": "2025-09-10 02:17:20.168755", + "step": 649, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:20.206320", + "step": 649, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022069621831178665, + "timestamp": "2025-09-10 02:17:20.221952", + "step": 650, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:20.253942", + "step": 650, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029460687655955553, + "timestamp": "2025-09-10 02:17:20.261051", + "step": 651, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:20.291356", + "step": 651, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01202553603798151, + "timestamp": "2025-09-10 02:17:20.319792", + "step": 652, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:20.357378", + "step": 652, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0178877804428339, + "timestamp": "2025-09-10 02:17:20.370475", + "step": 653, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:20.400728", + "step": 653, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02879807911813259, + "timestamp": "2025-09-10 02:17:20.407758", + "step": 654, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:20.439357", + "step": 654, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014536075294017792, + "timestamp": "2025-09-10 02:17:20.449146", + "step": 655, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:20.479745", + "step": 655, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005087182391434908, + "timestamp": "2025-09-10 02:17:20.512393", + "step": 656, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:20.542746", + "step": 656, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021268010139465332, + "timestamp": "2025-09-10 02:17:20.552947", + "step": 657, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:20.584319", + "step": 657, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03790181875228882, + "timestamp": "2025-09-10 02:17:20.591566", + "step": 658, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:20.623381", + "step": 658, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01340021938085556, + "timestamp": "2025-09-10 02:17:20.630847", + "step": 659, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:20.660721", + "step": 659, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04001796990633011, + "timestamp": "2025-09-10 02:17:20.689358", + "step": 660, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:20.720306", + "step": 660, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014666594564914703, + "timestamp": "2025-09-10 02:17:20.724796", + "step": 661, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:20.755743", + "step": 661, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015082466416060925, + "timestamp": "2025-09-10 02:17:20.763048", + "step": 662, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:20.793259", + "step": 662, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02892708219587803, + "timestamp": "2025-09-10 02:17:20.800964", + "step": 663, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:20.831799", + "step": 663, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015628747642040253, + "timestamp": "2025-09-10 02:17:20.864736", + "step": 664, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:20.896146", + "step": 664, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00712405052036047, + "timestamp": "2025-09-10 02:17:20.900645", + "step": 665, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:20.930963", + "step": 665, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026593917980790138, + "timestamp": "2025-09-10 02:17:20.937907", + "step": 666, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:20.968190", + "step": 666, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024430980905890465, + "timestamp": "2025-09-10 02:17:20.979065", + "step": 667, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:21.009583", + "step": 667, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00685026403516531, + "timestamp": "2025-09-10 02:17:21.034329", + "step": 668, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:21.064108", + "step": 668, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017591923475265503, + "timestamp": "2025-09-10 02:17:21.068756", + "step": 669, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:21.100801", + "step": 669, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037963759154081345, + "timestamp": "2025-09-10 02:17:21.108507", + "step": 670, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:21.139238", + "step": 670, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010039789602160454, + "timestamp": "2025-09-10 02:17:21.146744", + "step": 671, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:21.177482", + "step": 671, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009369760751724243, + "timestamp": "2025-09-10 02:17:21.208546", + "step": 672, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:21.239684", + "step": 672, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021921101957559586, + "timestamp": "2025-09-10 02:17:21.244528", + "step": 673, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:17:21.283405", + "step": 673, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031845226883888245, + "timestamp": "2025-09-10 02:17:21.299593", + "step": 674, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:21.330395", + "step": 674, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018058914691209793, + "timestamp": "2025-09-10 02:17:21.337228", + "step": 675, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:21.367685", + "step": 675, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014186178334057331, + "timestamp": "2025-09-10 02:17:21.392579", + "step": 676, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:21.422596", + "step": 676, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009794117882847786, + "timestamp": "2025-09-10 02:17:21.427139", + "step": 677, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:21.461444", + "step": 677, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02925429679453373, + "timestamp": "2025-09-10 02:17:21.468343", + "step": 678, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:21.498941", + "step": 678, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006639067083597183, + "timestamp": "2025-09-10 02:17:21.506197", + "step": 679, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:21.539972", + "step": 679, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012162303552031517, + "timestamp": "2025-09-10 02:17:21.574487", + "step": 680, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:21.606403", + "step": 680, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01577383652329445, + "timestamp": "2025-09-10 02:17:21.608542", + "step": 681, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:21.638652", + "step": 681, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01001597661525011, + "timestamp": "2025-09-10 02:17:21.645573", + "step": 682, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:21.676435", + "step": 682, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027138683944940567, + "timestamp": "2025-09-10 02:17:21.684229", + "step": 683, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:21.714391", + "step": 683, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01829609088599682, + "timestamp": "2025-09-10 02:17:21.742189", + "step": 684, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:21.778582", + "step": 684, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020318562164902687, + "timestamp": "2025-09-10 02:17:21.793784", + "step": 685, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:21.824562", + "step": 685, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025962335988879204, + "timestamp": "2025-09-10 02:17:21.832278", + "step": 686, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:21.864754", + "step": 686, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022130966186523438, + "timestamp": "2025-09-10 02:17:21.871795", + "step": 687, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:21.908649", + "step": 687, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020423393696546555, + "timestamp": "2025-09-10 02:17:21.936467", + "step": 688, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:21.969398", + "step": 688, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025996601209044456, + "timestamp": "2025-09-10 02:17:21.974116", + "step": 689, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:22.008827", + "step": 689, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013769307173788548, + "timestamp": "2025-09-10 02:17:22.021168", + "step": 690, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:22.051835", + "step": 690, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01184395607560873, + "timestamp": "2025-09-10 02:17:22.058527", + "step": 691, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:22.089604", + "step": 691, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013138137757778168, + "timestamp": "2025-09-10 02:17:22.117290", + "step": 692, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:22.148020", + "step": 692, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02101938985288143, + "timestamp": "2025-09-10 02:17:22.152625", + "step": 693, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:22.182555", + "step": 693, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008636675775051117, + "timestamp": "2025-09-10 02:17:22.190279", + "step": 694, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:22.220932", + "step": 694, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014400548301637173, + "timestamp": "2025-09-10 02:17:22.228369", + "step": 695, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:17:22.266988", + "step": 695, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02432694099843502, + "timestamp": "2025-09-10 02:17:22.304035", + "step": 696, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:22.334547", + "step": 696, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00980927050113678, + "timestamp": "2025-09-10 02:17:22.342975", + "step": 697, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:22.373782", + "step": 697, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011573218740522861, + "timestamp": "2025-09-10 02:17:22.381161", + "step": 698, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:22.411688", + "step": 698, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02407762221992016, + "timestamp": "2025-09-10 02:17:22.419006", + "step": 699, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:22.448918", + "step": 699, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02633582428097725, + "timestamp": "2025-09-10 02:17:22.476479", + "step": 700, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:22.506687", + "step": 700, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011304855346679688, + "timestamp": "2025-09-10 02:17:22.517108", + "step": 701, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:22.547634", + "step": 701, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019669000059366226, + "timestamp": "2025-09-10 02:17:22.554496", + "step": 702, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:22.585771", + "step": 702, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024419734254479408, + "timestamp": "2025-09-10 02:17:22.596278", + "step": 703, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:22.627396", + "step": 703, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023848844692111015, + "timestamp": "2025-09-10 02:17:22.655573", + "step": 704, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:22.686399", + "step": 704, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010360152460634708, + "timestamp": "2025-09-10 02:17:22.696638", + "step": 705, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:22.727958", + "step": 705, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011346792802214622, + "timestamp": "2025-09-10 02:17:22.731910", + "step": 706, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:22.762430", + "step": 706, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013497546315193176, + "timestamp": "2025-09-10 02:17:22.774546", + "step": 707, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:22.807696", + "step": 707, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015288002789020538, + "timestamp": "2025-09-10 02:17:22.841930", + "step": 708, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:22.872858", + "step": 708, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013141672126948833, + "timestamp": "2025-09-10 02:17:22.877838", + "step": 709, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:22.907856", + "step": 709, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020733583718538284, + "timestamp": "2025-09-10 02:17:22.911893", + "step": 710, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:22.944525", + "step": 710, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01969611644744873, + "timestamp": "2025-09-10 02:17:22.957824", + "step": 711, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:17:22.989433", + "step": 711, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018604954704642296, + "timestamp": "2025-09-10 02:17:23.012821", + "step": 712, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:23.043016", + "step": 712, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018361497670412064, + "timestamp": "2025-09-10 02:17:23.050764", + "step": 713, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:23.081369", + "step": 713, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013966246508061886, + "timestamp": "2025-09-10 02:17:23.088194", + "step": 714, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:23.117132", + "step": 714, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010636130347847939, + "timestamp": "2025-09-10 02:17:23.123967", + "step": 715, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:23.154581", + "step": 715, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01876237615942955, + "timestamp": "2025-09-10 02:17:23.185152", + "step": 716, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:23.217490", + "step": 716, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008436868898570538, + "timestamp": "2025-09-10 02:17:23.230458", + "step": 717, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:23.260739", + "step": 717, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013500401750206947, + "timestamp": "2025-09-10 02:17:23.271538", + "step": 718, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:23.310070", + "step": 718, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01989280991256237, + "timestamp": "2025-09-10 02:17:23.325658", + "step": 719, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:23.357652", + "step": 719, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011787742376327515, + "timestamp": "2025-09-10 02:17:23.389493", + "step": 720, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:23.419435", + "step": 720, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007818952202796936, + "timestamp": "2025-09-10 02:17:23.424154", + "step": 721, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:23.455056", + "step": 721, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02153034135699272, + "timestamp": "2025-09-10 02:17:23.466054", + "step": 722, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:23.496351", + "step": 722, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01763448491692543, + "timestamp": "2025-09-10 02:17:23.503135", + "step": 723, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:23.533766", + "step": 723, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017756912857294083, + "timestamp": "2025-09-10 02:17:23.561649", + "step": 724, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:23.592162", + "step": 724, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024670034646987915, + "timestamp": "2025-09-10 02:17:23.594202", + "step": 725, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:23.623940", + "step": 725, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009984654374420643, + "timestamp": "2025-09-10 02:17:23.628162", + "step": 726, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:23.659122", + "step": 726, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021593144163489342, + "timestamp": "2025-09-10 02:17:23.663205", + "step": 727, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:23.693579", + "step": 727, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025460926815867424, + "timestamp": "2025-09-10 02:17:23.719090", + "step": 728, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:17:23.755523", + "step": 728, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006311932113021612, + "timestamp": "2025-09-10 02:17:23.770962", + "step": 729, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:23.801168", + "step": 729, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013910098932683468, + "timestamp": "2025-09-10 02:17:23.807934", + "step": 730, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:23.839242", + "step": 730, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00481247017160058, + "timestamp": "2025-09-10 02:17:23.851799", + "step": 731, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:23.883378", + "step": 731, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013029472902417183, + "timestamp": "2025-09-10 02:17:23.911603", + "step": 732, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:23.943677", + "step": 732, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004847945179790258, + "timestamp": "2025-09-10 02:17:23.951290", + "step": 733, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:23.982241", + "step": 733, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01138862781226635, + "timestamp": "2025-09-10 02:17:23.986032", + "step": 734, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:24.017503", + "step": 734, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02151215262711048, + "timestamp": "2025-09-10 02:17:24.024494", + "step": 735, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:17:34.139298", + "step": 735, + "epoch": 1 + }, + { + "type": "pplx", + "content": 18458793.49173297, + "timestamp": "2025-09-10 02:17:34.143714", + "step": 735, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:34.175489", + "step": 735, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01929275132715702, + "timestamp": "2025-09-10 02:17:34.209678", + "step": 736, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:34.246127", + "step": 736, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013413517735898495, + "timestamp": "2025-09-10 02:17:34.261241", + "step": 737, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:34.292957", + "step": 737, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0064349048770964146, + "timestamp": "2025-09-10 02:17:34.300048", + "step": 738, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:34.330515", + "step": 738, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007773600518703461, + "timestamp": "2025-09-10 02:17:34.338056", + "step": 739, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:17:34.369741", + "step": 739, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008945588953793049, + "timestamp": "2025-09-10 02:17:34.393563", + "step": 740, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:34.423783", + "step": 740, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024763548746705055, + "timestamp": "2025-09-10 02:17:34.426047", + "step": 741, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:34.456115", + "step": 741, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020738394930958748, + "timestamp": "2025-09-10 02:17:34.463004", + "step": 742, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:34.493870", + "step": 742, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035269984509795904, + "timestamp": "2025-09-10 02:17:34.497726", + "step": 743, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:34.528175", + "step": 743, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030427515506744385, + "timestamp": "2025-09-10 02:17:34.553115", + "step": 744, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:34.583845", + "step": 744, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007679258938878775, + "timestamp": "2025-09-10 02:17:34.586081", + "step": 745, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:34.616894", + "step": 745, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028332481160759926, + "timestamp": "2025-09-10 02:17:34.629300", + "step": 746, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:34.659701", + "step": 746, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008473207242786884, + "timestamp": "2025-09-10 02:17:34.666562", + "step": 747, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:34.697133", + "step": 747, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017313247546553612, + "timestamp": "2025-09-10 02:17:34.728079", + "step": 748, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:34.758141", + "step": 748, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013101726770401001, + "timestamp": "2025-09-10 02:17:34.762842", + "step": 749, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:34.795984", + "step": 749, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032856224570423365, + "timestamp": "2025-09-10 02:17:34.809377", + "step": 750, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:34.839594", + "step": 750, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011557753197848797, + "timestamp": "2025-09-10 02:17:34.846505", + "step": 751, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:34.877292", + "step": 751, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019083227962255478, + "timestamp": "2025-09-10 02:17:34.904773", + "step": 752, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:34.936422", + "step": 752, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015568030066788197, + "timestamp": "2025-09-10 02:17:34.943847", + "step": 753, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:34.974977", + "step": 753, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004572854842990637, + "timestamp": "2025-09-10 02:17:34.985500", + "step": 754, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:35.015995", + "step": 754, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.000890351424459368, + "timestamp": "2025-09-10 02:17:35.023548", + "step": 755, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:35.053850", + "step": 755, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01434353832155466, + "timestamp": "2025-09-10 02:17:35.079127", + "step": 756, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:35.110114", + "step": 756, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016249870881438255, + "timestamp": "2025-09-10 02:17:35.114852", + "step": 757, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:17:35.156515", + "step": 757, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038629692047834396, + "timestamp": "2025-09-10 02:17:35.173760", + "step": 758, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:35.205032", + "step": 758, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05195966735482216, + "timestamp": "2025-09-10 02:17:35.217498", + "step": 759, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:35.247998", + "step": 759, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03985142335295677, + "timestamp": "2025-09-10 02:17:35.275939", + "step": 760, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:35.306453", + "step": 760, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012833379209041595, + "timestamp": "2025-09-10 02:17:35.310647", + "step": 761, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:35.341151", + "step": 761, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018250539898872375, + "timestamp": "2025-09-10 02:17:35.353575", + "step": 762, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:17:35.387132", + "step": 762, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006811958272010088, + "timestamp": "2025-09-10 02:17:35.401072", + "step": 763, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:35.431899", + "step": 763, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01085501629859209, + "timestamp": "2025-09-10 02:17:35.459535", + "step": 764, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:35.489598", + "step": 764, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034105248749256134, + "timestamp": "2025-09-10 02:17:35.494325", + "step": 765, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:35.524429", + "step": 765, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007085829973220825, + "timestamp": "2025-09-10 02:17:35.531214", + "step": 766, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:35.569083", + "step": 766, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038453133311122656, + "timestamp": "2025-09-10 02:17:35.584662", + "step": 767, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:35.616279", + "step": 767, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009612992405891418, + "timestamp": "2025-09-10 02:17:35.644013", + "step": 768, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:35.675253", + "step": 768, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012257935479283333, + "timestamp": "2025-09-10 02:17:35.677227", + "step": 769, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:35.708722", + "step": 769, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01698746345937252, + "timestamp": "2025-09-10 02:17:35.720306", + "step": 770, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:35.751747", + "step": 770, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01926126517355442, + "timestamp": "2025-09-10 02:17:35.758927", + "step": 771, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:35.791395", + "step": 771, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01315612904727459, + "timestamp": "2025-09-10 02:17:35.815732", + "step": 772, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:35.847862", + "step": 772, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017368396744132042, + "timestamp": "2025-09-10 02:17:35.856892", + "step": 773, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:35.892080", + "step": 773, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003305921098217368, + "timestamp": "2025-09-10 02:17:35.899155", + "step": 774, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:35.935778", + "step": 774, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006261749658733606, + "timestamp": "2025-09-10 02:17:35.942911", + "step": 775, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:35.989877", + "step": 775, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025168852880597115, + "timestamp": "2025-09-10 02:17:36.015181", + "step": 776, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:36.052838", + "step": 776, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014582036063075066, + "timestamp": "2025-09-10 02:17:36.057215", + "step": 777, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:36.095077", + "step": 777, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015908481553196907, + "timestamp": "2025-09-10 02:17:36.105369", + "step": 778, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:36.136298", + "step": 778, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004383227322250605, + "timestamp": "2025-09-10 02:17:36.142909", + "step": 779, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:36.173944", + "step": 779, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012968703173100948, + "timestamp": "2025-09-10 02:17:36.201145", + "step": 780, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:36.231798", + "step": 780, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015527973882853985, + "timestamp": "2025-09-10 02:17:36.236691", + "step": 781, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:36.271993", + "step": 781, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015053192153573036, + "timestamp": "2025-09-10 02:17:36.285682", + "step": 782, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:36.317256", + "step": 782, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012528965249657631, + "timestamp": "2025-09-10 02:17:36.324049", + "step": 783, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:17:36.362027", + "step": 783, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002056631725281477, + "timestamp": "2025-09-10 02:17:36.398818", + "step": 784, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:36.430314", + "step": 784, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004613164346665144, + "timestamp": "2025-09-10 02:17:36.439249", + "step": 785, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:36.477062", + "step": 785, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04229161515831947, + "timestamp": "2025-09-10 02:17:36.492661", + "step": 786, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:36.523478", + "step": 786, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03430848568677902, + "timestamp": "2025-09-10 02:17:36.530252", + "step": 787, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:36.563970", + "step": 787, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006993894465267658, + "timestamp": "2025-09-10 02:17:36.598606", + "step": 788, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:36.632384", + "step": 788, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004820824600756168, + "timestamp": "2025-09-10 02:17:36.636484", + "step": 789, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:36.667555", + "step": 789, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0256601981818676, + "timestamp": "2025-09-10 02:17:36.674098", + "step": 790, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:17:36.721295", + "step": 790, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013889133930206299, + "timestamp": "2025-09-10 02:17:36.740383", + "step": 791, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:36.771718", + "step": 791, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016334451735019684, + "timestamp": "2025-09-10 02:17:36.799678", + "step": 792, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:36.830397", + "step": 792, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013368922285735607, + "timestamp": "2025-09-10 02:17:36.834885", + "step": 793, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:36.867576", + "step": 793, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031976792961359024, + "timestamp": "2025-09-10 02:17:36.874661", + "step": 794, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:36.911198", + "step": 794, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0013559797080233693, + "timestamp": "2025-09-10 02:17:36.918054", + "step": 795, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:36.957012", + "step": 795, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04322435334324837, + "timestamp": "2025-09-10 02:17:36.991235", + "step": 796, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:37.031780", + "step": 796, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0115485405549407, + "timestamp": "2025-09-10 02:17:37.044793", + "step": 797, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:37.083728", + "step": 797, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003736104816198349, + "timestamp": "2025-09-10 02:17:37.090801", + "step": 798, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:37.124580", + "step": 798, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014022842049598694, + "timestamp": "2025-09-10 02:17:37.128190", + "step": 799, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:37.159139", + "step": 799, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013005274347960949, + "timestamp": "2025-09-10 02:17:37.182616", + "step": 800, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:17:37.213215", + "step": 800, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02133549191057682, + "timestamp": "2025-09-10 02:17:37.215490", + "step": 801, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:37.247157", + "step": 801, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029299429152160883, + "timestamp": "2025-09-10 02:17:37.259354", + "step": 802, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:37.295020", + "step": 802, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014492860063910484, + "timestamp": "2025-09-10 02:17:37.297558", + "step": 803, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:37.333756", + "step": 803, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021839609369635582, + "timestamp": "2025-09-10 02:17:37.368272", + "step": 804, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:37.400485", + "step": 804, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030333133414387703, + "timestamp": "2025-09-10 02:17:37.404770", + "step": 805, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:37.437794", + "step": 805, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006924864836037159, + "timestamp": "2025-09-10 02:17:37.448090", + "step": 806, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:37.479748", + "step": 806, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0072951540350914, + "timestamp": "2025-09-10 02:17:37.486157", + "step": 807, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:37.517782", + "step": 807, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006309094373136759, + "timestamp": "2025-09-10 02:17:37.545367", + "step": 808, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:37.580837", + "step": 808, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00040522878407500684, + "timestamp": "2025-09-10 02:17:37.585120", + "step": 809, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:37.617472", + "step": 809, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006362107116729021, + "timestamp": "2025-09-10 02:17:37.619848", + "step": 810, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:37.651402", + "step": 810, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008096226491034031, + "timestamp": "2025-09-10 02:17:37.658397", + "step": 811, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:37.691987", + "step": 811, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007765918970108032, + "timestamp": "2025-09-10 02:17:37.726560", + "step": 812, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:37.758589", + "step": 812, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019435886293649673, + "timestamp": "2025-09-10 02:17:37.762951", + "step": 813, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:37.794126", + "step": 813, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009066428057849407, + "timestamp": "2025-09-10 02:17:37.797550", + "step": 814, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:37.828748", + "step": 814, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014025976415723562, + "timestamp": "2025-09-10 02:17:37.838601", + "step": 815, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:37.870535", + "step": 815, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023165103048086166, + "timestamp": "2025-09-10 02:17:37.900977", + "step": 816, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:37.933424", + "step": 816, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012905867770314217, + "timestamp": "2025-09-10 02:17:37.946463", + "step": 817, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:37.979295", + "step": 817, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023732444271445274, + "timestamp": "2025-09-10 02:17:37.992647", + "step": 818, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:38.024568", + "step": 818, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03576406463980675, + "timestamp": "2025-09-10 02:17:38.033872", + "step": 819, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:38.064401", + "step": 819, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039100583642721176, + "timestamp": "2025-09-10 02:17:38.089401", + "step": 820, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:38.120591", + "step": 820, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019176315516233444, + "timestamp": "2025-09-10 02:17:38.124732", + "step": 821, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:38.156050", + "step": 821, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06485612690448761, + "timestamp": "2025-09-10 02:17:38.163315", + "step": 822, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:17:38.193669", + "step": 822, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012710961746051908, + "timestamp": "2025-09-10 02:17:38.197109", + "step": 823, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:38.227795", + "step": 823, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002674340968951583, + "timestamp": "2025-09-10 02:17:38.252966", + "step": 824, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:38.284796", + "step": 824, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005030173342674971, + "timestamp": "2025-09-10 02:17:38.291925", + "step": 825, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:38.324273", + "step": 825, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033865850418806076, + "timestamp": "2025-09-10 02:17:38.327924", + "step": 826, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:38.359190", + "step": 826, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051615625619888306, + "timestamp": "2025-09-10 02:17:38.363253", + "step": 827, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:38.393455", + "step": 827, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005924302618950605, + "timestamp": "2025-09-10 02:17:38.421082", + "step": 828, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:38.452362", + "step": 828, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009351923130452633, + "timestamp": "2025-09-10 02:17:38.461365", + "step": 829, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:38.492734", + "step": 829, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014958539046347141, + "timestamp": "2025-09-10 02:17:38.500339", + "step": 830, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:38.532470", + "step": 830, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009349385276436806, + "timestamp": "2025-09-10 02:17:38.538976", + "step": 831, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:38.570077", + "step": 831, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026025842875242233, + "timestamp": "2025-09-10 02:17:38.603218", + "step": 832, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:38.635495", + "step": 832, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018966345116496086, + "timestamp": "2025-09-10 02:17:38.642712", + "step": 833, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:38.674175", + "step": 833, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04127897694706917, + "timestamp": "2025-09-10 02:17:38.680832", + "step": 834, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:38.712426", + "step": 834, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04661082848906517, + "timestamp": "2025-09-10 02:17:38.719223", + "step": 835, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:38.751110", + "step": 835, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022765228524804115, + "timestamp": "2025-09-10 02:17:38.778922", + "step": 836, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:38.809955", + "step": 836, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01644345186650753, + "timestamp": "2025-09-10 02:17:38.818849", + "step": 837, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:17:38.857840", + "step": 837, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046977002173662186, + "timestamp": "2025-09-10 02:17:38.873689", + "step": 838, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:38.905257", + "step": 838, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02852710708975792, + "timestamp": "2025-09-10 02:17:38.909440", + "step": 839, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:38.939934", + "step": 839, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04006481543183327, + "timestamp": "2025-09-10 02:17:38.967495", + "step": 840, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:17:39.007106", + "step": 840, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026953106746077538, + "timestamp": "2025-09-10 02:17:39.023998", + "step": 841, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:39.054804", + "step": 841, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010299399495124817, + "timestamp": "2025-09-10 02:17:39.061572", + "step": 842, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:39.091752", + "step": 842, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018122700974345207, + "timestamp": "2025-09-10 02:17:39.094076", + "step": 843, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:39.125506", + "step": 843, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015159577131271362, + "timestamp": "2025-09-10 02:17:39.153533", + "step": 844, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:17:39.190048", + "step": 844, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017123881727457047, + "timestamp": "2025-09-10 02:17:39.205650", + "step": 845, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:39.240889", + "step": 845, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005689023993909359, + "timestamp": "2025-09-10 02:17:39.254559", + "step": 846, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:17:39.288982", + "step": 846, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016331713646650314, + "timestamp": "2025-09-10 02:17:39.302923", + "step": 847, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:39.335924", + "step": 847, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022277653217315674, + "timestamp": "2025-09-10 02:17:39.360399", + "step": 848, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:39.392440", + "step": 848, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027887245640158653, + "timestamp": "2025-09-10 02:17:39.394610", + "step": 849, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:39.425130", + "step": 849, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006013993173837662, + "timestamp": "2025-09-10 02:17:39.435458", + "step": 850, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:39.466820", + "step": 850, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00961573701351881, + "timestamp": "2025-09-10 02:17:39.473322", + "step": 851, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:39.504528", + "step": 851, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028055304661393166, + "timestamp": "2025-09-10 02:17:39.534905", + "step": 852, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:17:39.574164", + "step": 852, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010051725432276726, + "timestamp": "2025-09-10 02:17:39.591067", + "step": 853, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:17:39.630072", + "step": 853, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008215261623263359, + "timestamp": "2025-09-10 02:17:39.645899", + "step": 854, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:39.677467", + "step": 854, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014234711416065693, + "timestamp": "2025-09-10 02:17:39.683693", + "step": 855, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:39.715222", + "step": 855, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011703640222549438, + "timestamp": "2025-09-10 02:17:39.745554", + "step": 856, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:39.777271", + "step": 856, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010386110283434391, + "timestamp": "2025-09-10 02:17:39.786202", + "step": 857, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:39.818376", + "step": 857, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026391830295324326, + "timestamp": "2025-09-10 02:17:39.825003", + "step": 858, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:39.856397", + "step": 858, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02559771202504635, + "timestamp": "2025-09-10 02:17:39.862969", + "step": 859, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:17:39.899353", + "step": 859, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019128460437059402, + "timestamp": "2025-09-10 02:17:39.934178", + "step": 860, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:39.966353", + "step": 860, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00833536684513092, + "timestamp": "2025-09-10 02:17:39.978583", + "step": 861, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:40.009753", + "step": 861, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012589543126523495, + "timestamp": "2025-09-10 02:17:40.016543", + "step": 862, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:40.049684", + "step": 862, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006604044698178768, + "timestamp": "2025-09-10 02:17:40.053898", + "step": 863, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:40.086709", + "step": 863, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02710030786693096, + "timestamp": "2025-09-10 02:17:40.114207", + "step": 864, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:40.145830", + "step": 864, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006634250283241272, + "timestamp": "2025-09-10 02:17:40.153831", + "step": 865, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:40.184219", + "step": 865, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03137756139039993, + "timestamp": "2025-09-10 02:17:40.190972", + "step": 866, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:40.222239", + "step": 866, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02019382454454899, + "timestamp": "2025-09-10 02:17:40.229805", + "step": 867, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:40.260687", + "step": 867, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00791159924119711, + "timestamp": "2025-09-10 02:17:40.288245", + "step": 868, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:40.318958", + "step": 868, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005334521643817425, + "timestamp": "2025-09-10 02:17:40.323049", + "step": 869, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:40.354600", + "step": 869, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009703114628791809, + "timestamp": "2025-09-10 02:17:40.358892", + "step": 870, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:40.390479", + "step": 870, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029153967276215553, + "timestamp": "2025-09-10 02:17:40.397821", + "step": 871, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:40.428409", + "step": 871, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007150101009756327, + "timestamp": "2025-09-10 02:17:40.451663", + "step": 872, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:40.482550", + "step": 872, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008403966203331947, + "timestamp": "2025-09-10 02:17:40.486683", + "step": 873, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:40.519587", + "step": 873, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029630528762936592, + "timestamp": "2025-09-10 02:17:40.532895", + "step": 874, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:40.564255", + "step": 874, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011236722581088543, + "timestamp": "2025-09-10 02:17:40.571169", + "step": 875, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:40.602266", + "step": 875, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01793195679783821, + "timestamp": "2025-09-10 02:17:40.630123", + "step": 876, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:40.660927", + "step": 876, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004515457898378372, + "timestamp": "2025-09-10 02:17:40.663538", + "step": 877, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:40.694461", + "step": 877, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01135172974318266, + "timestamp": "2025-09-10 02:17:40.698873", + "step": 878, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:40.729815", + "step": 878, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00693327933549881, + "timestamp": "2025-09-10 02:17:40.739606", + "step": 879, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:40.771350", + "step": 879, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017033789306879044, + "timestamp": "2025-09-10 02:17:40.799018", + "step": 880, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:40.829888", + "step": 880, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012997663579881191, + "timestamp": "2025-09-10 02:17:40.839821", + "step": 881, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:40.870538", + "step": 881, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009415126405656338, + "timestamp": "2025-09-10 02:17:40.881247", + "step": 882, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:17:51.050797", + "step": 882, + "epoch": 1 + }, + { + "type": "pplx", + "content": 14730864.383457733, + "timestamp": "2025-09-10 02:17:51.055567", + "step": 882, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:51.091661", + "step": 882, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022233616560697556, + "timestamp": "2025-09-10 02:17:51.099797", + "step": 883, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:51.135781", + "step": 883, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009659935720264912, + "timestamp": "2025-09-10 02:17:51.163204", + "step": 884, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:51.201257", + "step": 884, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030786585062742233, + "timestamp": "2025-09-10 02:17:51.207893", + "step": 885, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:51.238953", + "step": 885, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021192189306020737, + "timestamp": "2025-09-10 02:17:51.246685", + "step": 886, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 784 + ], + "flops": 23255845310656 + }, + "timestamp": "2025-09-10 02:17:51.318129", + "step": 886, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04029746726155281, + "timestamp": "2025-09-10 02:17:51.345232", + "step": 887, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:51.376862", + "step": 887, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006447978317737579, + "timestamp": "2025-09-10 02:17:51.410235", + "step": 888, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:51.441644", + "step": 888, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02322995476424694, + "timestamp": "2025-09-10 02:17:51.443876", + "step": 889, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:51.474542", + "step": 889, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010987287387251854, + "timestamp": "2025-09-10 02:17:51.485276", + "step": 890, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:51.516512", + "step": 890, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02331923507153988, + "timestamp": "2025-09-10 02:17:51.526594", + "step": 891, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:17:51.565807", + "step": 891, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003565514227375388, + "timestamp": "2025-09-10 02:17:51.602876", + "step": 892, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:51.633770", + "step": 892, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04227833077311516, + "timestamp": "2025-09-10 02:17:51.636076", + "step": 893, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:51.667184", + "step": 893, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021429577842354774, + "timestamp": "2025-09-10 02:17:51.674087", + "step": 894, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:51.705630", + "step": 894, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03835199028253555, + "timestamp": "2025-09-10 02:17:51.713251", + "step": 895, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:51.743135", + "step": 895, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0153651786968112, + "timestamp": "2025-09-10 02:17:51.768400", + "step": 896, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:51.801241", + "step": 896, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0387214757502079, + "timestamp": "2025-09-10 02:17:51.803342", + "step": 897, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:51.832852", + "step": 897, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020098849199712276, + "timestamp": "2025-09-10 02:17:51.837127", + "step": 898, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:51.867212", + "step": 898, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009437446482479572, + "timestamp": "2025-09-10 02:17:51.875034", + "step": 899, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:51.906254", + "step": 899, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025736164301633835, + "timestamp": "2025-09-10 02:17:51.934806", + "step": 900, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:51.966020", + "step": 900, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001165196648798883, + "timestamp": "2025-09-10 02:17:51.968310", + "step": 901, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:52.000301", + "step": 901, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019891690462827682, + "timestamp": "2025-09-10 02:17:52.008143", + "step": 902, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:52.039005", + "step": 902, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03028137981891632, + "timestamp": "2025-09-10 02:17:52.045936", + "step": 903, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:17:52.084242", + "step": 903, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014906581491231918, + "timestamp": "2025-09-10 02:17:52.120703", + "step": 904, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:52.151409", + "step": 904, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009351144544780254, + "timestamp": "2025-09-10 02:17:52.159191", + "step": 905, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:52.189988", + "step": 905, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032125215977430344, + "timestamp": "2025-09-10 02:17:52.193955", + "step": 906, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:17:52.237212", + "step": 906, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02834523655474186, + "timestamp": "2025-09-10 02:17:52.254904", + "step": 907, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:52.290019", + "step": 907, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011470122262835503, + "timestamp": "2025-09-10 02:17:52.324553", + "step": 908, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:52.357249", + "step": 908, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03763606771826744, + "timestamp": "2025-09-10 02:17:52.359462", + "step": 909, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:52.390369", + "step": 909, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020886852871626616, + "timestamp": "2025-09-10 02:17:52.397115", + "step": 910, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:52.428896", + "step": 910, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00491158664226532, + "timestamp": "2025-09-10 02:17:52.436093", + "step": 911, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:52.466335", + "step": 911, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010647162795066833, + "timestamp": "2025-09-10 02:17:52.494032", + "step": 912, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:52.525074", + "step": 912, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02704322710633278, + "timestamp": "2025-09-10 02:17:52.527308", + "step": 913, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:52.558028", + "step": 913, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03703900799155235, + "timestamp": "2025-09-10 02:17:52.565527", + "step": 914, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:52.595613", + "step": 914, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05229032784700394, + "timestamp": "2025-09-10 02:17:52.603099", + "step": 915, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:52.632844", + "step": 915, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02464185282588005, + "timestamp": "2025-09-10 02:17:52.657740", + "step": 916, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:52.687050", + "step": 916, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029760537669062614, + "timestamp": "2025-09-10 02:17:52.688949", + "step": 917, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:52.719815", + "step": 917, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023682432249188423, + "timestamp": "2025-09-10 02:17:52.727378", + "step": 918, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:52.758497", + "step": 918, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0028019100427627563, + "timestamp": "2025-09-10 02:17:52.766244", + "step": 919, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:52.796453", + "step": 919, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013780993409454823, + "timestamp": "2025-09-10 02:17:52.824088", + "step": 920, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:52.854433", + "step": 920, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003197154263034463, + "timestamp": "2025-09-10 02:17:52.858986", + "step": 921, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:52.890585", + "step": 921, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002631398383527994, + "timestamp": "2025-09-10 02:17:52.903136", + "step": 922, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:52.934809", + "step": 922, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01756918616592884, + "timestamp": "2025-09-10 02:17:52.945697", + "step": 923, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:52.976339", + "step": 923, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022316312417387962, + "timestamp": "2025-09-10 02:17:53.001800", + "step": 924, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:53.033155", + "step": 924, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018341967836022377, + "timestamp": "2025-09-10 02:17:53.040820", + "step": 925, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:53.075483", + "step": 925, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007413599174469709, + "timestamp": "2025-09-10 02:17:53.089315", + "step": 926, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:53.119379", + "step": 926, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02032196894288063, + "timestamp": "2025-09-10 02:17:53.126669", + "step": 927, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:53.157084", + "step": 927, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007340454496443272, + "timestamp": "2025-09-10 02:17:53.185787", + "step": 928, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:53.218495", + "step": 928, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017748655751347542, + "timestamp": "2025-09-10 02:17:53.226305", + "step": 929, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:53.256717", + "step": 929, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042677875608205795, + "timestamp": "2025-09-10 02:17:53.260877", + "step": 930, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:53.291560", + "step": 930, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009248084388673306, + "timestamp": "2025-09-10 02:17:53.299141", + "step": 931, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:53.330419", + "step": 931, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015127205289900303, + "timestamp": "2025-09-10 02:17:53.361965", + "step": 932, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:53.392356", + "step": 932, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02554660104215145, + "timestamp": "2025-09-10 02:17:53.394539", + "step": 933, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:53.425626", + "step": 933, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018056869506835938, + "timestamp": "2025-09-10 02:17:53.437773", + "step": 934, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:53.468052", + "step": 934, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039137158542871475, + "timestamp": "2025-09-10 02:17:53.475535", + "step": 935, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:53.505811", + "step": 935, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03655305504798889, + "timestamp": "2025-09-10 02:17:53.530566", + "step": 936, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:53.563734", + "step": 936, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02264043502509594, + "timestamp": "2025-09-10 02:17:53.571984", + "step": 937, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:53.603803", + "step": 937, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0072896406054496765, + "timestamp": "2025-09-10 02:17:53.607995", + "step": 938, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:53.639895", + "step": 938, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01063856016844511, + "timestamp": "2025-09-10 02:17:53.646943", + "step": 939, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:53.677247", + "step": 939, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012549477629363537, + "timestamp": "2025-09-10 02:17:53.705519", + "step": 940, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:17:53.742721", + "step": 940, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007854852825403214, + "timestamp": "2025-09-10 02:17:53.758153", + "step": 941, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:53.789869", + "step": 941, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008021929301321507, + "timestamp": "2025-09-10 02:17:53.797362", + "step": 942, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:53.828747", + "step": 942, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03075227700173855, + "timestamp": "2025-09-10 02:17:53.836399", + "step": 943, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:53.867457", + "step": 943, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012394532561302185, + "timestamp": "2025-09-10 02:17:53.892784", + "step": 944, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:53.924195", + "step": 944, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025795314460992813, + "timestamp": "2025-09-10 02:17:53.928702", + "step": 945, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:53.965401", + "step": 945, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03643295168876648, + "timestamp": "2025-09-10 02:17:53.972943", + "step": 946, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:54.012422", + "step": 946, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01099133025854826, + "timestamp": "2025-09-10 02:17:54.019892", + "step": 947, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:54.055067", + "step": 947, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010035491548478603, + "timestamp": "2025-09-10 02:17:54.082908", + "step": 948, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:54.130125", + "step": 948, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018783031031489372, + "timestamp": "2025-09-10 02:17:54.135462", + "step": 949, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:54.172827", + "step": 949, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065679592080414295, + "timestamp": "2025-09-10 02:17:54.179688", + "step": 950, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:54.227461", + "step": 950, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030873224139213562, + "timestamp": "2025-09-10 02:17:54.234850", + "step": 951, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:54.267001", + "step": 951, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00860142894089222, + "timestamp": "2025-09-10 02:17:54.298805", + "step": 952, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:54.329849", + "step": 952, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02184119261801243, + "timestamp": "2025-09-10 02:17:54.334802", + "step": 953, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:54.365625", + "step": 953, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009527009911835194, + "timestamp": "2025-09-10 02:17:54.375907", + "step": 954, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:54.408819", + "step": 954, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020068276673555374, + "timestamp": "2025-09-10 02:17:54.422151", + "step": 955, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:54.457236", + "step": 955, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010814903303980827, + "timestamp": "2025-09-10 02:17:54.488267", + "step": 956, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:54.520071", + "step": 956, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02371845953166485, + "timestamp": "2025-09-10 02:17:54.525943", + "step": 957, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:54.565905", + "step": 957, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029727578163146973, + "timestamp": "2025-09-10 02:17:54.571947", + "step": 958, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:54.603808", + "step": 958, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05049288645386696, + "timestamp": "2025-09-10 02:17:54.614183", + "step": 959, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:54.646310", + "step": 959, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00983439851552248, + "timestamp": "2025-09-10 02:17:54.671570", + "step": 960, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:54.705032", + "step": 960, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02309414930641651, + "timestamp": "2025-09-10 02:17:54.711307", + "step": 961, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:17:54.751183", + "step": 961, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0041077896021306515, + "timestamp": "2025-09-10 02:17:54.765144", + "step": 962, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:54.798115", + "step": 962, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0642273798584938, + "timestamp": "2025-09-10 02:17:54.810293", + "step": 963, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:17:54.845738", + "step": 963, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007594208233058453, + "timestamp": "2025-09-10 02:17:54.880449", + "step": 964, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:54.910901", + "step": 964, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021292701363563538, + "timestamp": "2025-09-10 02:17:54.915566", + "step": 965, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:54.947901", + "step": 965, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00916915200650692, + "timestamp": "2025-09-10 02:17:54.951819", + "step": 966, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:54.983075", + "step": 966, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006665355525910854, + "timestamp": "2025-09-10 02:17:54.990165", + "step": 967, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:55.025106", + "step": 967, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012122230604290962, + "timestamp": "2025-09-10 02:17:55.058652", + "step": 968, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 576 + ], + "flops": 17085996872448 + }, + "timestamp": "2025-09-10 02:17:55.103815", + "step": 968, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0122977988794446, + "timestamp": "2025-09-10 02:17:55.123080", + "step": 969, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:55.154056", + "step": 969, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00949710514396429, + "timestamp": "2025-09-10 02:17:55.164111", + "step": 970, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:55.194022", + "step": 970, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018477456644177437, + "timestamp": "2025-09-10 02:17:55.198355", + "step": 971, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:55.229368", + "step": 971, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014558763243258, + "timestamp": "2025-09-10 02:17:55.257943", + "step": 972, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:17:55.294468", + "step": 972, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008184276521205902, + "timestamp": "2025-09-10 02:17:55.307496", + "step": 973, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:17:55.343894", + "step": 973, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017617663368582726, + "timestamp": "2025-09-10 02:17:55.349012", + "step": 974, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:55.385405", + "step": 974, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013045444153249264, + "timestamp": "2025-09-10 02:17:55.389583", + "step": 975, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:55.421516", + "step": 975, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02563711628317833, + "timestamp": "2025-09-10 02:17:55.451436", + "step": 976, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:55.481948", + "step": 976, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013601159676909447, + "timestamp": "2025-09-10 02:17:55.484011", + "step": 977, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:17:55.514755", + "step": 977, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026903489604592323, + "timestamp": "2025-09-10 02:17:55.517153", + "step": 978, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:55.548412", + "step": 978, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008285568095743656, + "timestamp": "2025-09-10 02:17:55.552794", + "step": 979, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:17:55.584495", + "step": 979, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008680049329996109, + "timestamp": "2025-09-10 02:17:55.615365", + "step": 980, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:55.646309", + "step": 980, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004374523181468248, + "timestamp": "2025-09-10 02:17:55.651294", + "step": 981, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:17:55.682655", + "step": 981, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018112564459443092, + "timestamp": "2025-09-10 02:17:55.693510", + "step": 982, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:17:55.723932", + "step": 982, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017343124374747276, + "timestamp": "2025-09-10 02:17:55.728098", + "step": 983, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:55.757844", + "step": 983, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009760797023773193, + "timestamp": "2025-09-10 02:17:55.785672", + "step": 984, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:17:55.816268", + "step": 984, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010807998478412628, + "timestamp": "2025-09-10 02:17:55.819473", + "step": 985, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:17:55.853423", + "step": 985, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024396957829594612, + "timestamp": "2025-09-10 02:17:55.856074", + "step": 986, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:55.887069", + "step": 986, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02711336500942707, + "timestamp": "2025-09-10 02:17:55.894496", + "step": 987, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:17:55.931416", + "step": 987, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006434720940887928, + "timestamp": "2025-09-10 02:17:55.965598", + "step": 988, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:56.000656", + "step": 988, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02933250367641449, + "timestamp": "2025-09-10 02:17:56.002861", + "step": 989, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:56.035652", + "step": 989, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04050236940383911, + "timestamp": "2025-09-10 02:17:56.047816", + "step": 990, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:17:56.083529", + "step": 990, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03573581948876381, + "timestamp": "2025-09-10 02:17:56.090451", + "step": 991, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:56.137426", + "step": 991, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013186642900109291, + "timestamp": "2025-09-10 02:17:56.165631", + "step": 992, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:17:56.198381", + "step": 992, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00814458541572094, + "timestamp": "2025-09-10 02:17:56.202916", + "step": 993, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:17:56.235284", + "step": 993, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04362935200333595, + "timestamp": "2025-09-10 02:17:56.239732", + "step": 994, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:56.272860", + "step": 994, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005004457198083401, + "timestamp": "2025-09-10 02:17:56.280773", + "step": 995, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:17:56.312241", + "step": 995, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026401042938232422, + "timestamp": "2025-09-10 02:17:56.340827", + "step": 996, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:17:56.371718", + "step": 996, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014147581532597542, + "timestamp": "2025-09-10 02:17:56.381443", + "step": 997, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:56.411960", + "step": 997, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005374276544898748, + "timestamp": "2025-09-10 02:17:56.419394", + "step": 998, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:17:56.450530", + "step": 998, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00914605613797903, + "timestamp": "2025-09-10 02:17:56.457781", + "step": 999, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:17:56.489128", + "step": 999, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01612034998834133, + "timestamp": "2025-09-10 02:17:56.522591", + "step": 1000, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 1000", + "timestamp": "2025-09-10 02:18:01.459465", + "step": 1000, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:01.491383", + "step": 1000, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0174104031175375, + "timestamp": "2025-09-10 02:18:01.494305", + "step": 1001, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:01.528010", + "step": 1001, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026182083413004875, + "timestamp": "2025-09-10 02:18:01.539594", + "step": 1002, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:01.570881", + "step": 1002, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03149298205971718, + "timestamp": "2025-09-10 02:18:01.574600", + "step": 1003, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:01.606718", + "step": 1003, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02260902337729931, + "timestamp": "2025-09-10 02:18:01.637592", + "step": 1004, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:01.668452", + "step": 1004, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009301579557359219, + "timestamp": "2025-09-10 02:18:01.676113", + "step": 1005, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:01.707010", + "step": 1005, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017554203048348427, + "timestamp": "2025-09-10 02:18:01.714003", + "step": 1006, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:01.744576", + "step": 1006, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02697034180164337, + "timestamp": "2025-09-10 02:18:01.748615", + "step": 1007, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:01.778798", + "step": 1007, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013858512975275517, + "timestamp": "2025-09-10 02:18:01.806623", + "step": 1008, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:01.837166", + "step": 1008, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01846943609416485, + "timestamp": "2025-09-10 02:18:01.842178", + "step": 1009, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:01.872899", + "step": 1009, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028651878237724304, + "timestamp": "2025-09-10 02:18:01.879770", + "step": 1010, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:01.910494", + "step": 1010, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015424097888171673, + "timestamp": "2025-09-10 02:18:01.920098", + "step": 1011, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:01.954475", + "step": 1011, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03452470153570175, + "timestamp": "2025-09-10 02:18:01.979541", + "step": 1012, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:02.009239", + "step": 1012, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01232621818780899, + "timestamp": "2025-09-10 02:18:02.011397", + "step": 1013, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:02.040787", + "step": 1013, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006808358710259199, + "timestamp": "2025-09-10 02:18:02.045323", + "step": 1014, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:02.076071", + "step": 1014, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011624851264059544, + "timestamp": "2025-09-10 02:18:02.086817", + "step": 1015, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:02.117025", + "step": 1015, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024632567539811134, + "timestamp": "2025-09-10 02:18:02.145882", + "step": 1016, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:02.176481", + "step": 1016, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017971431836485863, + "timestamp": "2025-09-10 02:18:02.181117", + "step": 1017, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:02.214734", + "step": 1017, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0073992046527564526, + "timestamp": "2025-09-10 02:18:02.225624", + "step": 1018, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:18:02.263494", + "step": 1018, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01738920249044895, + "timestamp": "2025-09-10 02:18:02.279509", + "step": 1019, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:02.310208", + "step": 1019, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010011442936956882, + "timestamp": "2025-09-10 02:18:02.333868", + "step": 1020, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:18:02.363806", + "step": 1020, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01954047754406929, + "timestamp": "2025-09-10 02:18:02.366349", + "step": 1021, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:02.396560", + "step": 1021, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021268155425786972, + "timestamp": "2025-09-10 02:18:02.403748", + "step": 1022, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:02.434350", + "step": 1022, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023624001070857048, + "timestamp": "2025-09-10 02:18:02.446757", + "step": 1023, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:18:02.487085", + "step": 1023, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025291163474321365, + "timestamp": "2025-09-10 02:18:02.517046", + "step": 1024, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:18:02.552710", + "step": 1024, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014068282209336758, + "timestamp": "2025-09-10 02:18:02.568165", + "step": 1025, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:02.602462", + "step": 1025, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01854473166167736, + "timestamp": "2025-09-10 02:18:02.615856", + "step": 1026, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:02.648053", + "step": 1026, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007462997920811176, + "timestamp": "2025-09-10 02:18:02.660020", + "step": 1027, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:18:02.695010", + "step": 1027, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008165022358298302, + "timestamp": "2025-09-10 02:18:02.729907", + "step": 1028, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:02.759893", + "step": 1028, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00575115904211998, + "timestamp": "2025-09-10 02:18:02.768441", + "step": 1029, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:18:12.958862", + "step": 1029, + "epoch": 1 + }, + { + "type": "pplx", + "content": 12598956.534986155, + "timestamp": "2025-09-10 02:18:12.961843", + "step": 1029, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:12.993434", + "step": 1029, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025716153904795647, + "timestamp": "2025-09-10 02:18:13.001709", + "step": 1030, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:13.032761", + "step": 1030, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011754123494029045, + "timestamp": "2025-09-10 02:18:13.037008", + "step": 1031, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:13.067369", + "step": 1031, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01727590523660183, + "timestamp": "2025-09-10 02:18:13.091208", + "step": 1032, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:13.122124", + "step": 1032, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014728769659996033, + "timestamp": "2025-09-10 02:18:13.126760", + "step": 1033, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:13.157423", + "step": 1033, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01519166398793459, + "timestamp": "2025-09-10 02:18:13.160059", + "step": 1034, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:13.190660", + "step": 1034, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003654760541394353, + "timestamp": "2025-09-10 02:18:13.198324", + "step": 1035, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:13.229987", + "step": 1035, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013432272709906101, + "timestamp": "2025-09-10 02:18:13.257838", + "step": 1036, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:13.289765", + "step": 1036, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017557019367814064, + "timestamp": "2025-09-10 02:18:13.300314", + "step": 1037, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:13.331706", + "step": 1037, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008299489505589008, + "timestamp": "2025-09-10 02:18:13.335575", + "step": 1038, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:13.366588", + "step": 1038, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008687845431268215, + "timestamp": "2025-09-10 02:18:13.371045", + "step": 1039, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:13.402670", + "step": 1039, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023219764232635498, + "timestamp": "2025-09-10 02:18:13.435613", + "step": 1040, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:18:13.469380", + "step": 1040, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007147731725126505, + "timestamp": "2025-09-10 02:18:13.482656", + "step": 1041, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:13.515176", + "step": 1041, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025176668539643288, + "timestamp": "2025-09-10 02:18:13.525281", + "step": 1042, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:13.556759", + "step": 1042, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030310701578855515, + "timestamp": "2025-09-10 02:18:13.564096", + "step": 1043, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:13.595495", + "step": 1043, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004898954648524523, + "timestamp": "2025-09-10 02:18:13.623811", + "step": 1044, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:13.654800", + "step": 1044, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0060010491870343685, + "timestamp": "2025-09-10 02:18:13.664468", + "step": 1045, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:18:13.704827", + "step": 1045, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00663131894543767, + "timestamp": "2025-09-10 02:18:13.721022", + "step": 1046, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:13.752850", + "step": 1046, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004566808696836233, + "timestamp": "2025-09-10 02:18:13.760384", + "step": 1047, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:13.791912", + "step": 1047, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034333836287260056, + "timestamp": "2025-09-10 02:18:13.819760", + "step": 1048, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:13.853412", + "step": 1048, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02081063576042652, + "timestamp": "2025-09-10 02:18:13.863158", + "step": 1049, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:13.895760", + "step": 1049, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02265256643295288, + "timestamp": "2025-09-10 02:18:13.903213", + "step": 1050, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:13.936643", + "step": 1050, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023109683766961098, + "timestamp": "2025-09-10 02:18:13.946822", + "step": 1051, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:13.981992", + "step": 1051, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008987885899841785, + "timestamp": "2025-09-10 02:18:14.016298", + "step": 1052, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:14.049436", + "step": 1052, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015795622020959854, + "timestamp": "2025-09-10 02:18:14.051658", + "step": 1053, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:14.086452", + "step": 1053, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0090614790096879, + "timestamp": "2025-09-10 02:18:14.099780", + "step": 1054, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:14.131728", + "step": 1054, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0187073964625597, + "timestamp": "2025-09-10 02:18:14.141814", + "step": 1055, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:14.174157", + "step": 1055, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02702743373811245, + "timestamp": "2025-09-10 02:18:14.202776", + "step": 1056, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:14.233455", + "step": 1056, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02460920810699463, + "timestamp": "2025-09-10 02:18:14.239029", + "step": 1057, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:14.270798", + "step": 1057, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009660584852099419, + "timestamp": "2025-09-10 02:18:14.278697", + "step": 1058, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:14.310361", + "step": 1058, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020776310935616493, + "timestamp": "2025-09-10 02:18:14.318047", + "step": 1059, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:14.349936", + "step": 1059, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02294449508190155, + "timestamp": "2025-09-10 02:18:14.378601", + "step": 1060, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:14.410728", + "step": 1060, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017590373754501343, + "timestamp": "2025-09-10 02:18:14.413245", + "step": 1061, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:18:14.448846", + "step": 1061, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006449908018112183, + "timestamp": "2025-09-10 02:18:14.462548", + "step": 1062, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:14.494747", + "step": 1062, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03565583750605583, + "timestamp": "2025-09-10 02:18:14.505002", + "step": 1063, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:18:14.547850", + "step": 1063, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0327337309718132, + "timestamp": "2025-09-10 02:18:14.586097", + "step": 1064, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:14.617996", + "step": 1064, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02565723843872547, + "timestamp": "2025-09-10 02:18:14.622747", + "step": 1065, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:14.654346", + "step": 1065, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009299799799919128, + "timestamp": "2025-09-10 02:18:14.661114", + "step": 1066, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:14.692825", + "step": 1066, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014539425261318684, + "timestamp": "2025-09-10 02:18:14.699563", + "step": 1067, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:14.731919", + "step": 1067, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0058663212694227695, + "timestamp": "2025-09-10 02:18:14.762906", + "step": 1068, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:14.797242", + "step": 1068, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013086764141917229, + "timestamp": "2025-09-10 02:18:14.802463", + "step": 1069, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:14.833918", + "step": 1069, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006901100277900696, + "timestamp": "2025-09-10 02:18:14.841018", + "step": 1070, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:14.871397", + "step": 1070, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03987196460366249, + "timestamp": "2025-09-10 02:18:14.881603", + "step": 1071, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:14.913411", + "step": 1071, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019523393362760544, + "timestamp": "2025-09-10 02:18:14.941939", + "step": 1072, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:14.972877", + "step": 1072, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008477416820824146, + "timestamp": "2025-09-10 02:18:14.978088", + "step": 1073, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:15.009439", + "step": 1073, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00965914037078619, + "timestamp": "2025-09-10 02:18:15.016235", + "step": 1074, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:15.047147", + "step": 1074, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030519306659698486, + "timestamp": "2025-09-10 02:18:15.054225", + "step": 1075, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:15.084514", + "step": 1075, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006563273724168539, + "timestamp": "2025-09-10 02:18:15.109753", + "step": 1076, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:15.140018", + "step": 1076, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034011027310043573, + "timestamp": "2025-09-10 02:18:15.142293", + "step": 1077, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:15.173398", + "step": 1077, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00994145218282938, + "timestamp": "2025-09-10 02:18:15.180346", + "step": 1078, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:15.211068", + "step": 1078, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00793201569467783, + "timestamp": "2025-09-10 02:18:15.218799", + "step": 1079, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:15.249649", + "step": 1079, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006165057886391878, + "timestamp": "2025-09-10 02:18:15.278386", + "step": 1080, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:15.309899", + "step": 1080, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017197439447045326, + "timestamp": "2025-09-10 02:18:15.314546", + "step": 1081, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:18:15.348968", + "step": 1081, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011048262938857079, + "timestamp": "2025-09-10 02:18:15.362821", + "step": 1082, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:15.395062", + "step": 1082, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016790146008133888, + "timestamp": "2025-09-10 02:18:15.402803", + "step": 1083, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:15.434194", + "step": 1083, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013046172447502613, + "timestamp": "2025-09-10 02:18:15.462016", + "step": 1084, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:15.493866", + "step": 1084, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015779945999383926, + "timestamp": "2025-09-10 02:18:15.501637", + "step": 1085, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:15.533252", + "step": 1085, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021666022017598152, + "timestamp": "2025-09-10 02:18:15.540188", + "step": 1086, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:15.571241", + "step": 1086, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007535271812230349, + "timestamp": "2025-09-10 02:18:15.581785", + "step": 1087, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:15.614685", + "step": 1087, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012627107091248035, + "timestamp": "2025-09-10 02:18:15.645687", + "step": 1088, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:15.676355", + "step": 1088, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021864308044314384, + "timestamp": "2025-09-10 02:18:15.681456", + "step": 1089, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:15.713779", + "step": 1089, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008793273940682411, + "timestamp": "2025-09-10 02:18:15.724719", + "step": 1090, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:15.756581", + "step": 1090, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030675516463816166, + "timestamp": "2025-09-10 02:18:15.763522", + "step": 1091, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:15.794568", + "step": 1091, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009418687783181667, + "timestamp": "2025-09-10 02:18:15.826297", + "step": 1092, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:15.859361", + "step": 1092, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005299612879753113, + "timestamp": "2025-09-10 02:18:15.869132", + "step": 1093, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:15.899394", + "step": 1093, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018485212698578835, + "timestamp": "2025-09-10 02:18:15.906197", + "step": 1094, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:15.936910", + "step": 1094, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019424965605139732, + "timestamp": "2025-09-10 02:18:15.947055", + "step": 1095, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:15.978515", + "step": 1095, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012880226597189903, + "timestamp": "2025-09-10 02:18:16.003377", + "step": 1096, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:16.034727", + "step": 1096, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007060025352984667, + "timestamp": "2025-09-10 02:18:16.038943", + "step": 1097, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:16.069518", + "step": 1097, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0054059443064033985, + "timestamp": "2025-09-10 02:18:16.082033", + "step": 1098, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:18:16.122331", + "step": 1098, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018658744171261787, + "timestamp": "2025-09-10 02:18:16.138003", + "step": 1099, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:16.168621", + "step": 1099, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003897774498909712, + "timestamp": "2025-09-10 02:18:16.193726", + "step": 1100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:16.224671", + "step": 1100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009347101673483849, + "timestamp": "2025-09-10 02:18:16.230213", + "step": 1101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:16.260845", + "step": 1101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03616241365671158, + "timestamp": "2025-09-10 02:18:16.264931", + "step": 1102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:16.298967", + "step": 1102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006681836675852537, + "timestamp": "2025-09-10 02:18:16.312335", + "step": 1103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:16.343253", + "step": 1103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008854770101606846, + "timestamp": "2025-09-10 02:18:16.374312", + "step": 1104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:16.405424", + "step": 1104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004546549171209335, + "timestamp": "2025-09-10 02:18:16.407728", + "step": 1105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:16.437927", + "step": 1105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010041974484920502, + "timestamp": "2025-09-10 02:18:16.440422", + "step": 1106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:18:16.474803", + "step": 1106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0157835241407156, + "timestamp": "2025-09-10 02:18:16.488466", + "step": 1107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:16.520358", + "step": 1107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020716842263936996, + "timestamp": "2025-09-10 02:18:16.553346", + "step": 1108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:16.584568", + "step": 1108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029057949781417847, + "timestamp": "2025-09-10 02:18:16.588795", + "step": 1109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:16.621696", + "step": 1109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02135612629354, + "timestamp": "2025-09-10 02:18:16.629582", + "step": 1110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:16.660800", + "step": 1110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0028702733106911182, + "timestamp": "2025-09-10 02:18:16.665292", + "step": 1111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:16.695943", + "step": 1111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013597295619547367, + "timestamp": "2025-09-10 02:18:16.720928", + "step": 1112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:16.751987", + "step": 1112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027478632982820272, + "timestamp": "2025-09-10 02:18:16.756533", + "step": 1113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:16.787835", + "step": 1113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023722779005765915, + "timestamp": "2025-09-10 02:18:16.794785", + "step": 1114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:16.826397", + "step": 1114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014487197622656822, + "timestamp": "2025-09-10 02:18:16.833719", + "step": 1115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:16.866648", + "step": 1115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015417618677020073, + "timestamp": "2025-09-10 02:18:16.895241", + "step": 1116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:16.927334", + "step": 1116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005695224739611149, + "timestamp": "2025-09-10 02:18:16.929586", + "step": 1117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:16.959925", + "step": 1117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04347721487283707, + "timestamp": "2025-09-10 02:18:16.964520", + "step": 1118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:16.998114", + "step": 1118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010189319029450417, + "timestamp": "2025-09-10 02:18:17.002800", + "step": 1119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:17.034445", + "step": 1119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020940367132425308, + "timestamp": "2025-09-10 02:18:17.063030", + "step": 1120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:17.094110", + "step": 1120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004970818292349577, + "timestamp": "2025-09-10 02:18:17.098676", + "step": 1121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:17.131615", + "step": 1121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004925449378788471, + "timestamp": "2025-09-10 02:18:17.135945", + "step": 1122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:17.167238", + "step": 1122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0076041617430746555, + "timestamp": "2025-09-10 02:18:17.174914", + "step": 1123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:17.205923", + "step": 1123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004425295628607273, + "timestamp": "2025-09-10 02:18:17.234476", + "step": 1124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:17.266629", + "step": 1124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022850140929222107, + "timestamp": "2025-09-10 02:18:17.279343", + "step": 1125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:17.310177", + "step": 1125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008794148452579975, + "timestamp": "2025-09-10 02:18:17.317476", + "step": 1126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:17.347814", + "step": 1126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02646883763372898, + "timestamp": "2025-09-10 02:18:17.351956", + "step": 1127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:17.382198", + "step": 1127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015476626344025135, + "timestamp": "2025-09-10 02:18:17.410749", + "step": 1128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:17.443000", + "step": 1128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023650110233575106, + "timestamp": "2025-09-10 02:18:17.455617", + "step": 1129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:17.487098", + "step": 1129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014028213918209076, + "timestamp": "2025-09-10 02:18:17.494611", + "step": 1130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:17.525210", + "step": 1130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006255102809518576, + "timestamp": "2025-09-10 02:18:17.535283", + "step": 1131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:17.566091", + "step": 1131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007090611848980188, + "timestamp": "2025-09-10 02:18:17.591020", + "step": 1132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:17.622925", + "step": 1132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015480038709938526, + "timestamp": "2025-09-10 02:18:17.627184", + "step": 1133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:17.658081", + "step": 1133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004326352383941412, + "timestamp": "2025-09-10 02:18:17.665220", + "step": 1134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:17.696819", + "step": 1134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02047579549252987, + "timestamp": "2025-09-10 02:18:17.703921", + "step": 1135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:17.736305", + "step": 1135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026631599757820368, + "timestamp": "2025-09-10 02:18:17.764785", + "step": 1136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:18:17.800907", + "step": 1136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013182473368942738, + "timestamp": "2025-09-10 02:18:17.816089", + "step": 1137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:17.847937", + "step": 1137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010062654502689838, + "timestamp": "2025-09-10 02:18:17.852496", + "step": 1138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:18:17.886744", + "step": 1138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003278909483924508, + "timestamp": "2025-09-10 02:18:17.900412", + "step": 1139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:17.930709", + "step": 1139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0194843877106905, + "timestamp": "2025-09-10 02:18:17.954539", + "step": 1140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:17.986665", + "step": 1140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002890173811465502, + "timestamp": "2025-09-10 02:18:17.992197", + "step": 1141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:18.024715", + "step": 1141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004366376902908087, + "timestamp": "2025-09-10 02:18:18.035599", + "step": 1142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:18.066749", + "step": 1142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005158752668648958, + "timestamp": "2025-09-10 02:18:18.074300", + "step": 1143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:18.108596", + "step": 1143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009630167856812477, + "timestamp": "2025-09-10 02:18:18.137319", + "step": 1144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:18.183432", + "step": 1144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012025467120110989, + "timestamp": "2025-09-10 02:18:18.187852", + "step": 1145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:18.219751", + "step": 1145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020432344172149897, + "timestamp": "2025-09-10 02:18:18.222161", + "step": 1146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:18.253028", + "step": 1146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006666641216725111, + "timestamp": "2025-09-10 02:18:18.260029", + "step": 1147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:18.290912", + "step": 1147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007748906966298819, + "timestamp": "2025-09-10 02:18:18.316141", + "step": 1148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:18.348874", + "step": 1148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02841871976852417, + "timestamp": "2025-09-10 02:18:18.354345", + "step": 1149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:18.385509", + "step": 1149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032413543667644262, + "timestamp": "2025-09-10 02:18:18.393360", + "step": 1150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:18.423775", + "step": 1150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010790413245558739, + "timestamp": "2025-09-10 02:18:18.431200", + "step": 1151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:18.461383", + "step": 1151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017973562702536583, + "timestamp": "2025-09-10 02:18:18.494542", + "step": 1152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:18.525330", + "step": 1152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002686847234144807, + "timestamp": "2025-09-10 02:18:18.529764", + "step": 1153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:18.562080", + "step": 1153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002674214309081435, + "timestamp": "2025-09-10 02:18:18.567873", + "step": 1154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:18.599988", + "step": 1154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012537644943222404, + "timestamp": "2025-09-10 02:18:18.608841", + "step": 1155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:18.642028", + "step": 1155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011615641415119171, + "timestamp": "2025-09-10 02:18:18.669363", + "step": 1156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:18.701750", + "step": 1156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01832910068333149, + "timestamp": "2025-09-10 02:18:18.703887", + "step": 1157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:18.735275", + "step": 1157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006739361677318811, + "timestamp": "2025-09-10 02:18:18.741931", + "step": 1158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:18.772936", + "step": 1158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01089425478130579, + "timestamp": "2025-09-10 02:18:18.779689", + "step": 1159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:18.810853", + "step": 1159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00913853757083416, + "timestamp": "2025-09-10 02:18:18.838352", + "step": 1160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:18.874197", + "step": 1160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014518055133521557, + "timestamp": "2025-09-10 02:18:18.881380", + "step": 1161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:18.913054", + "step": 1161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006596317049115896, + "timestamp": "2025-09-10 02:18:18.920645", + "step": 1162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:18.952024", + "step": 1162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014293434098362923, + "timestamp": "2025-09-10 02:18:18.959596", + "step": 1163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:18.991733", + "step": 1163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010215037502348423, + "timestamp": "2025-09-10 02:18:19.016672", + "step": 1164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:19.048796", + "step": 1164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030613288283348083, + "timestamp": "2025-09-10 02:18:19.053700", + "step": 1165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:19.085341", + "step": 1165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022717637475579977, + "timestamp": "2025-09-10 02:18:19.089396", + "step": 1166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:19.120100", + "step": 1166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009593302384018898, + "timestamp": "2025-09-10 02:18:19.127529", + "step": 1167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:19.159440", + "step": 1167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006897877436131239, + "timestamp": "2025-09-10 02:18:19.190357", + "step": 1168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:18:19.227629", + "step": 1168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.06167227774858475, + "timestamp": "2025-09-10 02:18:19.242984", + "step": 1169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:19.274562", + "step": 1169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04564559459686279, + "timestamp": "2025-09-10 02:18:19.278259", + "step": 1170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:19.310480", + "step": 1170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0009320880053564906, + "timestamp": "2025-09-10 02:18:19.317812", + "step": 1171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:19.350416", + "step": 1171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002427774015814066, + "timestamp": "2025-09-10 02:18:19.382777", + "step": 1172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:19.416252", + "step": 1172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010821384377777576, + "timestamp": "2025-09-10 02:18:19.424201", + "step": 1173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:19.458546", + "step": 1173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025808248668909073, + "timestamp": "2025-09-10 02:18:19.462683", + "step": 1174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:18:19.504314", + "step": 1174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.061185259371995926, + "timestamp": "2025-09-10 02:18:19.520458", + "step": 1175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:19.551166", + "step": 1175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0003842521400656551, + "timestamp": "2025-09-10 02:18:19.582991", + "step": 1176, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:18:29.776063", + "step": 1176, + "epoch": 1 + }, + { + "type": "pplx", + "content": 16784163.124731667, + "timestamp": "2025-09-10 02:18:29.779308", + "step": 1176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:29.810633", + "step": 1176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017028305679559708, + "timestamp": "2025-09-10 02:18:29.818914", + "step": 1177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:29.850203", + "step": 1177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03579110652208328, + "timestamp": "2025-09-10 02:18:29.854250", + "step": 1178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:29.885761", + "step": 1178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012345547787845135, + "timestamp": "2025-09-10 02:18:29.893048", + "step": 1179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:18:29.923823", + "step": 1179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014899312518537045, + "timestamp": "2025-09-10 02:18:29.947959", + "step": 1180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:29.978721", + "step": 1180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008222085423767567, + "timestamp": "2025-09-10 02:18:29.980848", + "step": 1181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:18:30.023345", + "step": 1181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018380869878455997, + "timestamp": "2025-09-10 02:18:30.041100", + "step": 1182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:30.072675", + "step": 1182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01529020071029663, + "timestamp": "2025-09-10 02:18:30.083507", + "step": 1183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:30.115357", + "step": 1183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014780040364712477, + "timestamp": "2025-09-10 02:18:30.139939", + "step": 1184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:30.171392", + "step": 1184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05387040600180626, + "timestamp": "2025-09-10 02:18:30.175793", + "step": 1185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:30.206809", + "step": 1185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022361472249031067, + "timestamp": "2025-09-10 02:18:30.213526", + "step": 1186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:30.246344", + "step": 1186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004187957849353552, + "timestamp": "2025-09-10 02:18:30.257187", + "step": 1187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:30.288529", + "step": 1187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00469655217602849, + "timestamp": "2025-09-10 02:18:30.311975", + "step": 1188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:30.343483", + "step": 1188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013641082681715488, + "timestamp": "2025-09-10 02:18:30.347869", + "step": 1189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:30.379141", + "step": 1189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024424701929092407, + "timestamp": "2025-09-10 02:18:30.386124", + "step": 1190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:30.417664", + "step": 1190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01394572388380766, + "timestamp": "2025-09-10 02:18:30.429702", + "step": 1191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:30.460633", + "step": 1191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011202634312212467, + "timestamp": "2025-09-10 02:18:30.488313", + "step": 1192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:30.520611", + "step": 1192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00934526789933443, + "timestamp": "2025-09-10 02:18:30.533275", + "step": 1193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:30.564516", + "step": 1193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013034219853579998, + "timestamp": "2025-09-10 02:18:30.572148", + "step": 1194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 18509808050496 + }, + "timestamp": "2025-09-10 02:18:30.626335", + "step": 1194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016260338947176933, + "timestamp": "2025-09-10 02:18:30.648043", + "step": 1195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:30.679582", + "step": 1195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01205496210604906, + "timestamp": "2025-09-10 02:18:30.711057", + "step": 1196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:30.742251", + "step": 1196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008653457276523113, + "timestamp": "2025-09-10 02:18:30.747482", + "step": 1197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:18:30.785544", + "step": 1197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017093000933527946, + "timestamp": "2025-09-10 02:18:30.801131", + "step": 1198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:30.833495", + "step": 1198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004972951021045446, + "timestamp": "2025-09-10 02:18:30.840111", + "step": 1199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:30.871337", + "step": 1199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027272850275039673, + "timestamp": "2025-09-10 02:18:30.896443", + "step": 1200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:30.929351", + "step": 1200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011625121347606182, + "timestamp": "2025-09-10 02:18:30.942023", + "step": 1201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:18:30.983449", + "step": 1201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03317030146718025, + "timestamp": "2025-09-10 02:18:31.000458", + "step": 1202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:18:31.036444", + "step": 1202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002671575639396906, + "timestamp": "2025-09-10 02:18:31.050463", + "step": 1203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:31.083564", + "step": 1203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009755785576999187, + "timestamp": "2025-09-10 02:18:31.111385", + "step": 1204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:31.149839", + "step": 1204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03215007483959198, + "timestamp": "2025-09-10 02:18:31.159052", + "step": 1205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:31.196525", + "step": 1205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010887703858315945, + "timestamp": "2025-09-10 02:18:31.207286", + "step": 1206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:31.251369", + "step": 1206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020919183269143105, + "timestamp": "2025-09-10 02:18:31.261938", + "step": 1207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:31.299237", + "step": 1207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04709470644593239, + "timestamp": "2025-09-10 02:18:31.332133", + "step": 1208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:31.374807", + "step": 1208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01061093620955944, + "timestamp": "2025-09-10 02:18:31.378490", + "step": 1209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:31.415608", + "step": 1209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029514219611883163, + "timestamp": "2025-09-10 02:18:31.422753", + "step": 1210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:31.455106", + "step": 1210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012318803928792477, + "timestamp": "2025-09-10 02:18:31.465102", + "step": 1211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:31.497093", + "step": 1211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018550723791122437, + "timestamp": "2025-09-10 02:18:31.524402", + "step": 1212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:31.557803", + "step": 1212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008835774846374989, + "timestamp": "2025-09-10 02:18:31.562204", + "step": 1213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:31.595282", + "step": 1213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01745608262717724, + "timestamp": "2025-09-10 02:18:31.601476", + "step": 1214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:31.633237", + "step": 1214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011802353896200657, + "timestamp": "2025-09-10 02:18:31.642797", + "step": 1215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:31.674542", + "step": 1215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01431284286081791, + "timestamp": "2025-09-10 02:18:31.705475", + "step": 1216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:31.737086", + "step": 1216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025099601596593857, + "timestamp": "2025-09-10 02:18:31.739302", + "step": 1217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:31.770797", + "step": 1217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02076675556600094, + "timestamp": "2025-09-10 02:18:31.778171", + "step": 1218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:31.809728", + "step": 1218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01054247748106718, + "timestamp": "2025-09-10 02:18:31.817146", + "step": 1219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:31.848780", + "step": 1219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00928487628698349, + "timestamp": "2025-09-10 02:18:31.877107", + "step": 1220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:31.907984", + "step": 1220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022254247218370438, + "timestamp": "2025-09-10 02:18:31.910448", + "step": 1221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:31.941402", + "step": 1221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011745232157409191, + "timestamp": "2025-09-10 02:18:31.945587", + "step": 1222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:31.976636", + "step": 1222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02063934877514839, + "timestamp": "2025-09-10 02:18:31.984337", + "step": 1223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:32.016405", + "step": 1223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.039252448827028275, + "timestamp": "2025-09-10 02:18:32.049530", + "step": 1224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:32.082554", + "step": 1224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012943130917847157, + "timestamp": "2025-09-10 02:18:32.084596", + "step": 1225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:32.116154", + "step": 1225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02003873698413372, + "timestamp": "2025-09-10 02:18:32.128135", + "step": 1226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:32.159589", + "step": 1226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02976025640964508, + "timestamp": "2025-09-10 02:18:32.164024", + "step": 1227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:32.194749", + "step": 1227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012167098931968212, + "timestamp": "2025-09-10 02:18:32.220046", + "step": 1228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:32.251374", + "step": 1228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021341700106859207, + "timestamp": "2025-09-10 02:18:32.259213", + "step": 1229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:32.289904", + "step": 1229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005283652804791927, + "timestamp": "2025-09-10 02:18:32.297233", + "step": 1230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:32.329083", + "step": 1230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01109201367944479, + "timestamp": "2025-09-10 02:18:32.336101", + "step": 1231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:32.367104", + "step": 1231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023132245987653732, + "timestamp": "2025-09-10 02:18:32.400218", + "step": 1232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:32.430711", + "step": 1232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010825731791555882, + "timestamp": "2025-09-10 02:18:32.435551", + "step": 1233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:32.466094", + "step": 1233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02231140062212944, + "timestamp": "2025-09-10 02:18:32.470249", + "step": 1234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:32.502763", + "step": 1234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029144972562789917, + "timestamp": "2025-09-10 02:18:32.508228", + "step": 1235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:32.539563", + "step": 1235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005374426953494549, + "timestamp": "2025-09-10 02:18:32.567470", + "step": 1236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:32.605255", + "step": 1236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02825375273823738, + "timestamp": "2025-09-10 02:18:32.612149", + "step": 1237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:32.645025", + "step": 1237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020113468170166016, + "timestamp": "2025-09-10 02:18:32.657017", + "step": 1238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:32.690869", + "step": 1238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018397843465209007, + "timestamp": "2025-09-10 02:18:32.701867", + "step": 1239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:32.735593", + "step": 1239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009672732092440128, + "timestamp": "2025-09-10 02:18:32.763428", + "step": 1240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:32.794477", + "step": 1240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015301401726901531, + "timestamp": "2025-09-10 02:18:32.799729", + "step": 1241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:32.832384", + "step": 1241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003928063903003931, + "timestamp": "2025-09-10 02:18:32.839501", + "step": 1242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:32.871701", + "step": 1242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011773375794291496, + "timestamp": "2025-09-10 02:18:32.878621", + "step": 1243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:32.918003", + "step": 1243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038322594482451677, + "timestamp": "2025-09-10 02:18:32.946709", + "step": 1244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:32.978578", + "step": 1244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.049418624490499496, + "timestamp": "2025-09-10 02:18:32.983159", + "step": 1245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:33.013761", + "step": 1245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026037881150841713, + "timestamp": "2025-09-10 02:18:33.020631", + "step": 1246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:33.051791", + "step": 1246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028841393068432808, + "timestamp": "2025-09-10 02:18:33.058811", + "step": 1247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:33.091678", + "step": 1247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00434449827298522, + "timestamp": "2025-09-10 02:18:33.120341", + "step": 1248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:33.151789", + "step": 1248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0594901405274868, + "timestamp": "2025-09-10 02:18:33.154354", + "step": 1249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:18:33.193377", + "step": 1249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02595318667590618, + "timestamp": "2025-09-10 02:18:33.209070", + "step": 1250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:33.241081", + "step": 1250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009637218900024891, + "timestamp": "2025-09-10 02:18:33.251101", + "step": 1251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:33.282406", + "step": 1251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020436033606529236, + "timestamp": "2025-09-10 02:18:33.310081", + "step": 1252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:33.342006", + "step": 1252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014274738729000092, + "timestamp": "2025-09-10 02:18:33.351794", + "step": 1253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:33.383906", + "step": 1253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01211103331297636, + "timestamp": "2025-09-10 02:18:33.393822", + "step": 1254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:33.427460", + "step": 1254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014368664706125855, + "timestamp": "2025-09-10 02:18:33.434463", + "step": 1255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:33.465462", + "step": 1255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024746278300881386, + "timestamp": "2025-09-10 02:18:33.493327", + "step": 1256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:33.524549", + "step": 1256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008130094036459923, + "timestamp": "2025-09-10 02:18:33.529833", + "step": 1257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:33.561516", + "step": 1257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024972526356577873, + "timestamp": "2025-09-10 02:18:33.565954", + "step": 1258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:33.596831", + "step": 1258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027556994929909706, + "timestamp": "2025-09-10 02:18:33.601277", + "step": 1259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:33.632346", + "step": 1259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018490461632609367, + "timestamp": "2025-09-10 02:18:33.661045", + "step": 1260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:33.695309", + "step": 1260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009816362522542477, + "timestamp": "2025-09-10 02:18:33.708052", + "step": 1261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:18:33.750489", + "step": 1261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016465116292238235, + "timestamp": "2025-09-10 02:18:33.766332", + "step": 1262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:33.799401", + "step": 1262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001814844785258174, + "timestamp": "2025-09-10 02:18:33.810308", + "step": 1263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:33.845201", + "step": 1263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013563080690801144, + "timestamp": "2025-09-10 02:18:33.873063", + "step": 1264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:33.908938", + "step": 1264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016335798427462578, + "timestamp": "2025-09-10 02:18:33.913348", + "step": 1265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:33.946699", + "step": 1265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01673940010368824, + "timestamp": "2025-09-10 02:18:33.957355", + "step": 1266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:33.991526", + "step": 1266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033870987594127655, + "timestamp": "2025-09-10 02:18:34.004095", + "step": 1267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:34.038372", + "step": 1267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005386251490563154, + "timestamp": "2025-09-10 02:18:34.069241", + "step": 1268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:34.107804", + "step": 1268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030843589454889297, + "timestamp": "2025-09-10 02:18:34.117801", + "step": 1269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:34.151381", + "step": 1269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00869796983897686, + "timestamp": "2025-09-10 02:18:34.158066", + "step": 1270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:34.189862", + "step": 1270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.046526242047548294, + "timestamp": "2025-09-10 02:18:34.200281", + "step": 1271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:18:34.245614", + "step": 1271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011616252362728119, + "timestamp": "2025-09-10 02:18:34.284123", + "step": 1272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:18:34.323074", + "step": 1272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007240879815071821, + "timestamp": "2025-09-10 02:18:34.338246", + "step": 1273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:34.372144", + "step": 1273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010705935768783092, + "timestamp": "2025-09-10 02:18:34.376353", + "step": 1274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:34.407558", + "step": 1274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004706122912466526, + "timestamp": "2025-09-10 02:18:34.414541", + "step": 1275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:34.448868", + "step": 1275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01904475688934326, + "timestamp": "2025-09-10 02:18:34.473491", + "step": 1276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:34.509351", + "step": 1276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004496569279581308, + "timestamp": "2025-09-10 02:18:34.513801", + "step": 1277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:34.554885", + "step": 1277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03280925750732422, + "timestamp": "2025-09-10 02:18:34.565678", + "step": 1278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:34.602218", + "step": 1278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005936585366725922, + "timestamp": "2025-09-10 02:18:34.609147", + "step": 1279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:34.640172", + "step": 1279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007575146853923798, + "timestamp": "2025-09-10 02:18:34.665352", + "step": 1280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:18:34.696244", + "step": 1280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012064780108630657, + "timestamp": "2025-09-10 02:18:34.699504", + "step": 1281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:34.732312", + "step": 1281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012344618327915668, + "timestamp": "2025-09-10 02:18:34.742146", + "step": 1282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:18:34.778396", + "step": 1282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025485141202807426, + "timestamp": "2025-09-10 02:18:34.792184", + "step": 1283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:34.827800", + "step": 1283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005437423940747976, + "timestamp": "2025-09-10 02:18:34.855789", + "step": 1284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:34.887426", + "step": 1284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023022016510367393, + "timestamp": "2025-09-10 02:18:34.891764", + "step": 1285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:34.929544", + "step": 1285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003953091334551573, + "timestamp": "2025-09-10 02:18:34.941312", + "step": 1286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:34.971935", + "step": 1286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005520283244550228, + "timestamp": "2025-09-10 02:18:34.983936", + "step": 1287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:35.017990", + "step": 1287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014434975571930408, + "timestamp": "2025-09-10 02:18:35.051393", + "step": 1288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:35.084410", + "step": 1288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011467205360531807, + "timestamp": "2025-09-10 02:18:35.096911", + "step": 1289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:35.127795", + "step": 1289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020779237151145935, + "timestamp": "2025-09-10 02:18:35.134525", + "step": 1290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:35.168761", + "step": 1290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018716433551162481, + "timestamp": "2025-09-10 02:18:35.175865", + "step": 1291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:35.206686", + "step": 1291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036745467223227024, + "timestamp": "2025-09-10 02:18:35.231575", + "step": 1292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:35.262541", + "step": 1292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016196925193071365, + "timestamp": "2025-09-10 02:18:35.270330", + "step": 1293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:35.301030", + "step": 1293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035647223703563213, + "timestamp": "2025-09-10 02:18:35.311839", + "step": 1294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:35.343584", + "step": 1294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010751097463071346, + "timestamp": "2025-09-10 02:18:35.346429", + "step": 1295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:35.377073", + "step": 1295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0385432243347168, + "timestamp": "2025-09-10 02:18:35.405578", + "step": 1296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:18:35.438840", + "step": 1296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026758210733532906, + "timestamp": "2025-09-10 02:18:35.451961", + "step": 1297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:35.484987", + "step": 1297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01798836700618267, + "timestamp": "2025-09-10 02:18:35.495753", + "step": 1298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:35.529550", + "step": 1298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01102465484291315, + "timestamp": "2025-09-10 02:18:35.533877", + "step": 1299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:35.564153", + "step": 1299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011853739619255066, + "timestamp": "2025-09-10 02:18:35.589056", + "step": 1300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:35.620634", + "step": 1300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03611797094345093, + "timestamp": "2025-09-10 02:18:35.630245", + "step": 1301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:18:35.670917", + "step": 1301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027977568097412586, + "timestamp": "2025-09-10 02:18:35.688020", + "step": 1302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:35.719854", + "step": 1302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009604268707334995, + "timestamp": "2025-09-10 02:18:35.726938", + "step": 1303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:35.758003", + "step": 1303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002295356709510088, + "timestamp": "2025-09-10 02:18:35.785829", + "step": 1304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:18:35.822533", + "step": 1304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008270000107586384, + "timestamp": "2025-09-10 02:18:35.837973", + "step": 1305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:18:35.872182", + "step": 1305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01986978016793728, + "timestamp": "2025-09-10 02:18:35.875338", + "step": 1306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:35.911087", + "step": 1306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023896988481283188, + "timestamp": "2025-09-10 02:18:35.921057", + "step": 1307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:35.957355", + "step": 1307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020381931215524673, + "timestamp": "2025-09-10 02:18:35.991669", + "step": 1308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:36.026271", + "step": 1308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014767967164516449, + "timestamp": "2025-09-10 02:18:36.031507", + "step": 1309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:36.065762", + "step": 1309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002422439632937312, + "timestamp": "2025-09-10 02:18:36.070149", + "step": 1310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:36.100810", + "step": 1310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024649931117892265, + "timestamp": "2025-09-10 02:18:36.108445", + "step": 1311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:36.141430", + "step": 1311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009912949986755848, + "timestamp": "2025-09-10 02:18:36.172465", + "step": 1312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:36.203763", + "step": 1312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002744142198935151, + "timestamp": "2025-09-10 02:18:36.212204", + "step": 1313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:36.243525", + "step": 1313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007335959933698177, + "timestamp": "2025-09-10 02:18:36.255726", + "step": 1314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:36.287489", + "step": 1314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.041926268488168716, + "timestamp": "2025-09-10 02:18:36.294324", + "step": 1315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:36.326188", + "step": 1315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036907510366290808, + "timestamp": "2025-09-10 02:18:36.357722", + "step": 1316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:18:36.391305", + "step": 1316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00216344790533185, + "timestamp": "2025-09-10 02:18:36.404473", + "step": 1317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:18:36.445720", + "step": 1317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009386607445776463, + "timestamp": "2025-09-10 02:18:36.461893", + "step": 1318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:36.494105", + "step": 1318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030109494924545288, + "timestamp": "2025-09-10 02:18:36.501386", + "step": 1319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:36.533395", + "step": 1319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012107326183468103, + "timestamp": "2025-09-10 02:18:36.566829", + "step": 1320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:36.598400", + "step": 1320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008304606191813946, + "timestamp": "2025-09-10 02:18:36.602975", + "step": 1321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:36.634882", + "step": 1321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022128764539957047, + "timestamp": "2025-09-10 02:18:36.642598", + "step": 1322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:36.673862", + "step": 1322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009982970543205738, + "timestamp": "2025-09-10 02:18:36.686427", + "step": 1323, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:18:46.851890", + "step": 1323, + "epoch": 1 + }, + { + "type": "pplx", + "content": 13954997.402758988, + "timestamp": "2025-09-10 02:18:46.854605", + "step": 1323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:46.886451", + "step": 1323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018629444763064384, + "timestamp": "2025-09-10 02:18:46.920587", + "step": 1324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:46.952098", + "step": 1324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015395854599773884, + "timestamp": "2025-09-10 02:18:46.960873", + "step": 1325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:46.992652", + "step": 1325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029722540639340878, + "timestamp": "2025-09-10 02:18:47.002965", + "step": 1326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:47.036788", + "step": 1326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0171508826315403, + "timestamp": "2025-09-10 02:18:47.050107", + "step": 1327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:47.081821", + "step": 1327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004396271891891956, + "timestamp": "2025-09-10 02:18:47.110082", + "step": 1328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:47.141128", + "step": 1328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03813646361231804, + "timestamp": "2025-09-10 02:18:47.145678", + "step": 1329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:47.177598", + "step": 1329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005988952703773975, + "timestamp": "2025-09-10 02:18:47.189873", + "step": 1330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:47.224977", + "step": 1330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03351500257849693, + "timestamp": "2025-09-10 02:18:47.238370", + "step": 1331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:47.271603", + "step": 1331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003736252663657069, + "timestamp": "2025-09-10 02:18:47.296535", + "step": 1332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:47.328532", + "step": 1332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03064594976603985, + "timestamp": "2025-09-10 02:18:47.337440", + "step": 1333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:47.369734", + "step": 1333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02222239412367344, + "timestamp": "2025-09-10 02:18:47.380141", + "step": 1334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:18:47.416111", + "step": 1334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026622384786605835, + "timestamp": "2025-09-10 02:18:47.429721", + "step": 1335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:47.464492", + "step": 1335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008432361297309399, + "timestamp": "2025-09-10 02:18:47.498715", + "step": 1336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:47.531910", + "step": 1336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004825720097869635, + "timestamp": "2025-09-10 02:18:47.534233", + "step": 1337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:47.565732", + "step": 1337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004051771480590105, + "timestamp": "2025-09-10 02:18:47.572231", + "step": 1338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:47.603586", + "step": 1338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020291676744818687, + "timestamp": "2025-09-10 02:18:47.610210", + "step": 1339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:47.642064", + "step": 1339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017118671908974648, + "timestamp": "2025-09-10 02:18:47.674586", + "step": 1340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:47.707026", + "step": 1340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0032200440764427185, + "timestamp": "2025-09-10 02:18:47.714102", + "step": 1341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:47.746221", + "step": 1341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031123815570026636, + "timestamp": "2025-09-10 02:18:47.755884", + "step": 1342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:47.787649", + "step": 1342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007114849053323269, + "timestamp": "2025-09-10 02:18:47.794652", + "step": 1343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:47.826963", + "step": 1343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00877163652330637, + "timestamp": "2025-09-10 02:18:47.859375", + "step": 1344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:47.890078", + "step": 1344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03030526638031006, + "timestamp": "2025-09-10 02:18:47.892273", + "step": 1345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:47.925807", + "step": 1345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005265057552605867, + "timestamp": "2025-09-10 02:18:47.936141", + "step": 1346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:47.971811", + "step": 1346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001451778458431363, + "timestamp": "2025-09-10 02:18:47.978351", + "step": 1347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:48.009497", + "step": 1347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011003616265952587, + "timestamp": "2025-09-10 02:18:48.037639", + "step": 1348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:48.070245", + "step": 1348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00957377441227436, + "timestamp": "2025-09-10 02:18:48.077376", + "step": 1349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:48.108133", + "step": 1349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004885104484856129, + "timestamp": "2025-09-10 02:18:48.114983", + "step": 1350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:48.148190", + "step": 1350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022131670266389847, + "timestamp": "2025-09-10 02:18:48.160564", + "step": 1351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:18:48.197590", + "step": 1351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004956061951816082, + "timestamp": "2025-09-10 02:18:48.232314", + "step": 1352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:48.265558", + "step": 1352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018229112029075623, + "timestamp": "2025-09-10 02:18:48.275361", + "step": 1353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:48.306870", + "step": 1353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010464141145348549, + "timestamp": "2025-09-10 02:18:48.316533", + "step": 1354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:48.347729", + "step": 1354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036923617590218782, + "timestamp": "2025-09-10 02:18:48.349819", + "step": 1355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:48.381637", + "step": 1355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05059584602713585, + "timestamp": "2025-09-10 02:18:48.409372", + "step": 1356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:18:48.441772", + "step": 1356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005611395929008722, + "timestamp": "2025-09-10 02:18:48.454840", + "step": 1357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:48.486090", + "step": 1357, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00173103844281286, + "timestamp": "2025-09-10 02:18:48.489918", + "step": 1358, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:48.521672", + "step": 1358, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00872302707284689, + "timestamp": "2025-09-10 02:18:48.534199", + "step": 1359, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:48.565533", + "step": 1359, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003065047785639763, + "timestamp": "2025-09-10 02:18:48.593194", + "step": 1360, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:48.624533", + "step": 1360, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007230323273688555, + "timestamp": "2025-09-10 02:18:48.631787", + "step": 1361, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:48.663902", + "step": 1361, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012289733625948429, + "timestamp": "2025-09-10 02:18:48.676196", + "step": 1362, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:18:48.711977", + "step": 1362, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05497897043824196, + "timestamp": "2025-09-10 02:18:48.725681", + "step": 1363, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:48.757658", + "step": 1363, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002912584925070405, + "timestamp": "2025-09-10 02:18:48.788064", + "step": 1364, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:18:48.821183", + "step": 1364, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022109579294919968, + "timestamp": "2025-09-10 02:18:48.834324", + "step": 1365, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:48.866771", + "step": 1365, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018029719591140747, + "timestamp": "2025-09-10 02:18:48.874073", + "step": 1366, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:48.906286", + "step": 1366, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.056414928287267685, + "timestamp": "2025-09-10 02:18:48.912869", + "step": 1367, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:48.945028", + "step": 1367, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05040454491972923, + "timestamp": "2025-09-10 02:18:48.975529", + "step": 1368, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:49.009208", + "step": 1368, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01217829529196024, + "timestamp": "2025-09-10 02:18:49.013682", + "step": 1369, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:49.048154", + "step": 1369, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02744656801223755, + "timestamp": "2025-09-10 02:18:49.061502", + "step": 1370, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:49.096528", + "step": 1370, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02190292812883854, + "timestamp": "2025-09-10 02:18:49.106149", + "step": 1371, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:18:49.140047", + "step": 1371, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03642702102661133, + "timestamp": "2025-09-10 02:18:49.174250", + "step": 1372, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:49.208227", + "step": 1372, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012048700824379921, + "timestamp": "2025-09-10 02:18:49.213934", + "step": 1373, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:49.255170", + "step": 1373, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02002662420272827, + "timestamp": "2025-09-10 02:18:49.262645", + "step": 1374, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:49.295812", + "step": 1374, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012123959138989449, + "timestamp": "2025-09-10 02:18:49.303233", + "step": 1375, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:49.334905", + "step": 1375, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007124970201402903, + "timestamp": "2025-09-10 02:18:49.366230", + "step": 1376, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:49.399376", + "step": 1376, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003386021126061678, + "timestamp": "2025-09-10 02:18:49.401904", + "step": 1377, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:49.434940", + "step": 1377, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0014372080331668258, + "timestamp": "2025-09-10 02:18:49.444371", + "step": 1378, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:49.476646", + "step": 1378, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03508186340332031, + "timestamp": "2025-09-10 02:18:49.482744", + "step": 1379, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 18509808050496 + }, + "timestamp": "2025-09-10 02:18:49.535283", + "step": 1379, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011475126259028912, + "timestamp": "2025-09-10 02:18:49.577910", + "step": 1380, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:49.614902", + "step": 1380, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0056556230410933495, + "timestamp": "2025-09-10 02:18:49.620965", + "step": 1381, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:49.657781", + "step": 1381, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008287766017019749, + "timestamp": "2025-09-10 02:18:49.661791", + "step": 1382, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:18:49.704027", + "step": 1382, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02117903158068657, + "timestamp": "2025-09-10 02:18:49.721395", + "step": 1383, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:49.754731", + "step": 1383, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022697385400533676, + "timestamp": "2025-09-10 02:18:49.787804", + "step": 1384, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:49.824331", + "step": 1384, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010887114331126213, + "timestamp": "2025-09-10 02:18:49.828629", + "step": 1385, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:49.861534", + "step": 1385, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012805354781448841, + "timestamp": "2025-09-10 02:18:49.865426", + "step": 1386, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:49.898188", + "step": 1386, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01245130505412817, + "timestamp": "2025-09-10 02:18:49.908448", + "step": 1387, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:49.939966", + "step": 1387, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028233621269464493, + "timestamp": "2025-09-10 02:18:49.964718", + "step": 1388, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:49.997212", + "step": 1388, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02826070412993431, + "timestamp": "2025-09-10 02:18:50.001547", + "step": 1389, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:50.033335", + "step": 1389, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009805792011320591, + "timestamp": "2025-09-10 02:18:50.040028", + "step": 1390, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:50.071252", + "step": 1390, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005362308118492365, + "timestamp": "2025-09-10 02:18:50.078294", + "step": 1391, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:50.111180", + "step": 1391, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013351285830140114, + "timestamp": "2025-09-10 02:18:50.135326", + "step": 1392, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:50.167928", + "step": 1392, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02243475615978241, + "timestamp": "2025-09-10 02:18:50.172362", + "step": 1393, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:50.210986", + "step": 1393, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014227988198399544, + "timestamp": "2025-09-10 02:18:50.223121", + "step": 1394, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:18:50.266730", + "step": 1394, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004071381408721209, + "timestamp": "2025-09-10 02:18:50.282661", + "step": 1395, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:50.314994", + "step": 1395, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008621515706181526, + "timestamp": "2025-09-10 02:18:50.345670", + "step": 1396, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:50.383033", + "step": 1396, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009817084297537804, + "timestamp": "2025-09-10 02:18:50.385956", + "step": 1397, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:50.419718", + "step": 1397, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012866640463471413, + "timestamp": "2025-09-10 02:18:50.426916", + "step": 1398, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:50.462427", + "step": 1398, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004508704878389835, + "timestamp": "2025-09-10 02:18:50.466612", + "step": 1399, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:18:50.499638", + "step": 1399, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01872037909924984, + "timestamp": "2025-09-10 02:18:50.523484", + "step": 1400, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:50.556465", + "step": 1400, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006643320899456739, + "timestamp": "2025-09-10 02:18:50.560693", + "step": 1401, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:50.591810", + "step": 1401, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007569948676973581, + "timestamp": "2025-09-10 02:18:50.599186", + "step": 1402, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:50.630800", + "step": 1402, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025758620351552963, + "timestamp": "2025-09-10 02:18:50.640631", + "step": 1403, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:50.671891", + "step": 1403, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0335959829390049, + "timestamp": "2025-09-10 02:18:50.696605", + "step": 1404, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:18:50.745995", + "step": 1404, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030695544555783272, + "timestamp": "2025-09-10 02:18:50.767499", + "step": 1405, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:50.799735", + "step": 1405, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0049163768999278545, + "timestamp": "2025-09-10 02:18:50.804055", + "step": 1406, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:50.836221", + "step": 1406, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010724040679633617, + "timestamp": "2025-09-10 02:18:50.839963", + "step": 1407, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:50.871715", + "step": 1407, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008394693955779076, + "timestamp": "2025-09-10 02:18:50.896887", + "step": 1408, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:50.928718", + "step": 1408, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026338692754507065, + "timestamp": "2025-09-10 02:18:50.933342", + "step": 1409, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:50.967081", + "step": 1409, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016750004142522812, + "timestamp": "2025-09-10 02:18:50.971288", + "step": 1410, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:51.003371", + "step": 1410, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012398646213114262, + "timestamp": "2025-09-10 02:18:51.010725", + "step": 1411, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:51.042668", + "step": 1411, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0241679884493351, + "timestamp": "2025-09-10 02:18:51.067747", + "step": 1412, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:51.100071", + "step": 1412, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02775205485522747, + "timestamp": "2025-09-10 02:18:51.104368", + "step": 1413, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:51.136377", + "step": 1413, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008829674683511257, + "timestamp": "2025-09-10 02:18:51.143509", + "step": 1414, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:51.175624", + "step": 1414, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015450743958353996, + "timestamp": "2025-09-10 02:18:51.182906", + "step": 1415, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:51.215436", + "step": 1415, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006386533845216036, + "timestamp": "2025-09-10 02:18:51.248305", + "step": 1416, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:51.280172", + "step": 1416, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010058706626296043, + "timestamp": "2025-09-10 02:18:51.284942", + "step": 1417, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:51.316164", + "step": 1417, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03240646421909332, + "timestamp": "2025-09-10 02:18:51.322735", + "step": 1418, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:51.354522", + "step": 1418, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009293629787862301, + "timestamp": "2025-09-10 02:18:51.364958", + "step": 1419, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:51.397068", + "step": 1419, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009477603249251842, + "timestamp": "2025-09-10 02:18:51.425134", + "step": 1420, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:51.457027", + "step": 1420, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0042470647022128105, + "timestamp": "2025-09-10 02:18:51.462142", + "step": 1421, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:51.493350", + "step": 1421, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028168709948658943, + "timestamp": "2025-09-10 02:18:51.500444", + "step": 1422, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:51.531912", + "step": 1422, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054098401218652725, + "timestamp": "2025-09-10 02:18:51.538707", + "step": 1423, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:51.569998", + "step": 1423, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013340512290596962, + "timestamp": "2025-09-10 02:18:51.594750", + "step": 1424, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:51.626137", + "step": 1424, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001269067986868322, + "timestamp": "2025-09-10 02:18:51.631049", + "step": 1425, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:51.661884", + "step": 1425, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01629549451172352, + "timestamp": "2025-09-10 02:18:51.669352", + "step": 1426, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:18:51.700505", + "step": 1426, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013591033406555653, + "timestamp": "2025-09-10 02:18:51.712492", + "step": 1427, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:51.743458", + "step": 1427, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01069872546941042, + "timestamp": "2025-09-10 02:18:51.772110", + "step": 1428, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:51.802797", + "step": 1428, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026248564943671227, + "timestamp": "2025-09-10 02:18:51.807208", + "step": 1429, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:51.838296", + "step": 1429, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013996967114508152, + "timestamp": "2025-09-10 02:18:51.845290", + "step": 1430, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:18:51.901325", + "step": 1430, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01444973610341549, + "timestamp": "2025-09-10 02:18:51.924697", + "step": 1431, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:51.957150", + "step": 1431, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01952037401497364, + "timestamp": "2025-09-10 02:18:51.984564", + "step": 1432, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:52.015526", + "step": 1432, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02621072344481945, + "timestamp": "2025-09-10 02:18:52.019985", + "step": 1433, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:52.050722", + "step": 1433, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019877398386597633, + "timestamp": "2025-09-10 02:18:52.055320", + "step": 1434, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:52.086287", + "step": 1434, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008985900320112705, + "timestamp": "2025-09-10 02:18:52.096891", + "step": 1435, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:52.128162", + "step": 1435, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006387191358953714, + "timestamp": "2025-09-10 02:18:52.158997", + "step": 1436, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:52.189148", + "step": 1436, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008200598880648613, + "timestamp": "2025-09-10 02:18:52.192387", + "step": 1437, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:18:52.224886", + "step": 1437, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014574953354895115, + "timestamp": "2025-09-10 02:18:52.228614", + "step": 1438, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:52.265452", + "step": 1438, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010290967300534248, + "timestamp": "2025-09-10 02:18:52.272532", + "step": 1439, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:52.305412", + "step": 1439, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009290986694395542, + "timestamp": "2025-09-10 02:18:52.336620", + "step": 1440, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:52.372568", + "step": 1440, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007599604316055775, + "timestamp": "2025-09-10 02:18:52.380911", + "step": 1441, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:52.415184", + "step": 1441, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008472729474306107, + "timestamp": "2025-09-10 02:18:52.421944", + "step": 1442, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:52.452994", + "step": 1442, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01178077794611454, + "timestamp": "2025-09-10 02:18:52.460256", + "step": 1443, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:52.492503", + "step": 1443, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02008945122361183, + "timestamp": "2025-09-10 02:18:52.520123", + "step": 1444, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:52.553628", + "step": 1444, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004093306139111519, + "timestamp": "2025-09-10 02:18:52.556553", + "step": 1445, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:52.589207", + "step": 1445, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007239846047013998, + "timestamp": "2025-09-10 02:18:52.595784", + "step": 1446, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:52.627497", + "step": 1446, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010106794536113739, + "timestamp": "2025-09-10 02:18:52.636740", + "step": 1447, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:18:52.675187", + "step": 1447, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013031134381890297, + "timestamp": "2025-09-10 02:18:52.711997", + "step": 1448, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:52.753937", + "step": 1448, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008832174353301525, + "timestamp": "2025-09-10 02:18:52.761978", + "step": 1449, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:52.794971", + "step": 1449, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021678507328033447, + "timestamp": "2025-09-10 02:18:52.804407", + "step": 1450, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:18:52.834882", + "step": 1450, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006038912571966648, + "timestamp": "2025-09-10 02:18:52.841917", + "step": 1451, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:18:52.872336", + "step": 1451, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015430964529514313, + "timestamp": "2025-09-10 02:18:52.903214", + "step": 1452, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:52.934017", + "step": 1452, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015627246350049973, + "timestamp": "2025-09-10 02:18:52.939110", + "step": 1453, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:52.969456", + "step": 1453, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01219885889440775, + "timestamp": "2025-09-10 02:18:52.982005", + "step": 1454, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:53.013431", + "step": 1454, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024581179022789, + "timestamp": "2025-09-10 02:18:53.024459", + "step": 1455, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:53.055390", + "step": 1455, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008804569952189922, + "timestamp": "2025-09-10 02:18:53.083745", + "step": 1456, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:53.115291", + "step": 1456, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013319587334990501, + "timestamp": "2025-09-10 02:18:53.123250", + "step": 1457, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:53.154585", + "step": 1457, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013631954789161682, + "timestamp": "2025-09-10 02:18:53.161717", + "step": 1458, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:18:53.194524", + "step": 1458, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012589896097779274, + "timestamp": "2025-09-10 02:18:53.200982", + "step": 1459, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:18:53.232868", + "step": 1459, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02879754640161991, + "timestamp": "2025-09-10 02:18:53.257640", + "step": 1460, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:53.288577", + "step": 1460, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008828964084386826, + "timestamp": "2025-09-10 02:18:53.293605", + "step": 1461, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:18:53.323952", + "step": 1461, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02157404087483883, + "timestamp": "2025-09-10 02:18:53.331730", + "step": 1462, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:18:53.362913", + "step": 1462, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02861526980996132, + "timestamp": "2025-09-10 02:18:53.373749", + "step": 1463, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:53.404535", + "step": 1463, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0109772440046072, + "timestamp": "2025-09-10 02:18:53.430088", + "step": 1464, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:18:53.460834", + "step": 1464, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02909570373594761, + "timestamp": "2025-09-10 02:18:53.463058", + "step": 1465, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:18:53.504235", + "step": 1465, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02886452153325081, + "timestamp": "2025-09-10 02:18:53.521575", + "step": 1466, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:18:53.560419", + "step": 1466, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013743521645665169, + "timestamp": "2025-09-10 02:18:53.576057", + "step": 1467, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:18:53.607609", + "step": 1467, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0125979483127594, + "timestamp": "2025-09-10 02:18:53.635898", + "step": 1468, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:18:53.667674", + "step": 1468, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02716805413365364, + "timestamp": "2025-09-10 02:18:53.677321", + "step": 1469, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:18:53.718446", + "step": 1469, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025139151141047478, + "timestamp": "2025-09-10 02:18:53.735533", + "step": 1470, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:19:03.879290", + "step": 1470, + "epoch": 1 + }, + { + "type": "pplx", + "content": 13626061.914788976, + "timestamp": "2025-09-10 02:19:03.882109", + "step": 1470, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:03.912866", + "step": 1470, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017740854993462563, + "timestamp": "2025-09-10 02:19:03.918795", + "step": 1471, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:19:03.958443", + "step": 1471, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021945033222436905, + "timestamp": "2025-09-10 02:19:03.995667", + "step": 1472, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:04.026813", + "step": 1472, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003880431642755866, + "timestamp": "2025-09-10 02:19:04.031328", + "step": 1473, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:04.061204", + "step": 1473, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025182703509926796, + "timestamp": "2025-09-10 02:19:04.071981", + "step": 1474, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:04.104926", + "step": 1474, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009050360880792141, + "timestamp": "2025-09-10 02:19:04.117506", + "step": 1475, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:04.148809", + "step": 1475, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020268557593226433, + "timestamp": "2025-09-10 02:19:04.181676", + "step": 1476, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:04.211493", + "step": 1476, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04309564083814621, + "timestamp": "2025-09-10 02:19:04.213697", + "step": 1477, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:04.243933", + "step": 1477, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0122428759932518, + "timestamp": "2025-09-10 02:19:04.256107", + "step": 1478, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:04.286555", + "step": 1478, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021157732233405113, + "timestamp": "2025-09-10 02:19:04.293319", + "step": 1479, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:04.323838", + "step": 1479, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00686487415805459, + "timestamp": "2025-09-10 02:19:04.356900", + "step": 1480, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:04.387730", + "step": 1480, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009640970267355442, + "timestamp": "2025-09-10 02:19:04.395982", + "step": 1481, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:04.427699", + "step": 1481, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0068480512127280235, + "timestamp": "2025-09-10 02:19:04.438232", + "step": 1482, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:04.469817", + "step": 1482, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002509176731109619, + "timestamp": "2025-09-10 02:19:04.482385", + "step": 1483, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:04.511920", + "step": 1483, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024369838647544384, + "timestamp": "2025-09-10 02:19:04.539793", + "step": 1484, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:04.570613", + "step": 1484, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012398682534694672, + "timestamp": "2025-09-10 02:19:04.572544", + "step": 1485, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:04.603140", + "step": 1485, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011835220269858837, + "timestamp": "2025-09-10 02:19:04.607851", + "step": 1486, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:04.638014", + "step": 1486, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004558969754725695, + "timestamp": "2025-09-10 02:19:04.641892", + "step": 1487, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:04.672545", + "step": 1487, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01352360937744379, + "timestamp": "2025-09-10 02:19:04.697856", + "step": 1488, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:04.728706", + "step": 1488, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00836183037608862, + "timestamp": "2025-09-10 02:19:04.730897", + "step": 1489, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:04.765809", + "step": 1489, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033379762899130583, + "timestamp": "2025-09-10 02:19:04.779844", + "step": 1490, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:19:04.811017", + "step": 1490, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006920557469129562, + "timestamp": "2025-09-10 02:19:04.813361", + "step": 1491, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:04.844166", + "step": 1491, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018455183133482933, + "timestamp": "2025-09-10 02:19:04.872005", + "step": 1492, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:04.903125", + "step": 1492, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020488440990447998, + "timestamp": "2025-09-10 02:19:04.911072", + "step": 1493, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:04.941252", + "step": 1493, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017999647185206413, + "timestamp": "2025-09-10 02:19:04.949128", + "step": 1494, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:04.979967", + "step": 1494, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02349008433520794, + "timestamp": "2025-09-10 02:19:04.983658", + "step": 1495, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:05.016393", + "step": 1495, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006956641562283039, + "timestamp": "2025-09-10 02:19:05.047311", + "step": 1496, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:05.077419", + "step": 1496, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012771239504218102, + "timestamp": "2025-09-10 02:19:05.085768", + "step": 1497, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:05.116904", + "step": 1497, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028537657111883163, + "timestamp": "2025-09-10 02:19:05.124157", + "step": 1498, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:05.153637", + "step": 1498, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020859253127127886, + "timestamp": "2025-09-10 02:19:05.156458", + "step": 1499, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:05.186926", + "step": 1499, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019206488505005836, + "timestamp": "2025-09-10 02:19:05.212161", + "step": 1500, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 1500", + "timestamp": "2025-09-10 02:19:09.902482", + "step": 1500, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:09.935075", + "step": 1500, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010249263606965542, + "timestamp": "2025-09-10 02:19:09.938233", + "step": 1501, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:09.970383", + "step": 1501, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024660227820277214, + "timestamp": "2025-09-10 02:19:09.979634", + "step": 1502, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:10.014194", + "step": 1502, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013713826425373554, + "timestamp": "2025-09-10 02:19:10.021249", + "step": 1503, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:19:10.063827", + "step": 1503, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006658419966697693, + "timestamp": "2025-09-10 02:19:10.102108", + "step": 1504, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:10.133383", + "step": 1504, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01818818412721157, + "timestamp": "2025-09-10 02:19:10.137431", + "step": 1505, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:10.169156", + "step": 1505, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023030122742056847, + "timestamp": "2025-09-10 02:19:10.176375", + "step": 1506, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:10.208117", + "step": 1506, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018940243870019913, + "timestamp": "2025-09-10 02:19:10.212098", + "step": 1507, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:19:10.250410", + "step": 1507, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006142920348793268, + "timestamp": "2025-09-10 02:19:10.287451", + "step": 1508, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:10.319677", + "step": 1508, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009842773899435997, + "timestamp": "2025-09-10 02:19:10.327667", + "step": 1509, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:10.357940", + "step": 1509, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018183773383498192, + "timestamp": "2025-09-10 02:19:10.365543", + "step": 1510, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:10.395927", + "step": 1510, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007604293525218964, + "timestamp": "2025-09-10 02:19:10.402947", + "step": 1511, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:10.433895", + "step": 1511, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008296381682157516, + "timestamp": "2025-09-10 02:19:10.461473", + "step": 1512, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:10.491791", + "step": 1512, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004962913691997528, + "timestamp": "2025-09-10 02:19:10.496950", + "step": 1513, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:10.528647", + "step": 1513, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006845235824584961, + "timestamp": "2025-09-10 02:19:10.536012", + "step": 1514, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:10.566685", + "step": 1514, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00035365772782824934, + "timestamp": "2025-09-10 02:19:10.569706", + "step": 1515, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:10.600963", + "step": 1515, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017009198665618896, + "timestamp": "2025-09-10 02:19:10.633604", + "step": 1516, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:10.665336", + "step": 1516, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011472431942820549, + "timestamp": "2025-09-10 02:19:10.669734", + "step": 1517, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:10.700629", + "step": 1517, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018043555319309235, + "timestamp": "2025-09-10 02:19:10.710367", + "step": 1518, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:10.741714", + "step": 1518, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012901760637760162, + "timestamp": "2025-09-10 02:19:10.754161", + "step": 1519, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:10.786107", + "step": 1519, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003726641181856394, + "timestamp": "2025-09-10 02:19:10.813635", + "step": 1520, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:10.844031", + "step": 1520, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004837970249354839, + "timestamp": "2025-09-10 02:19:10.848890", + "step": 1521, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:10.880087", + "step": 1521, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027267929166555405, + "timestamp": "2025-09-10 02:19:10.883803", + "step": 1522, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:10.914418", + "step": 1522, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02369379624724388, + "timestamp": "2025-09-10 02:19:10.921907", + "step": 1523, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:10.957545", + "step": 1523, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018621442141011357, + "timestamp": "2025-09-10 02:19:10.985654", + "step": 1524, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:11.017915", + "step": 1524, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01912684179842472, + "timestamp": "2025-09-10 02:19:11.022677", + "step": 1525, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:19:11.056726", + "step": 1525, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003869327250868082, + "timestamp": "2025-09-10 02:19:11.059031", + "step": 1526, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:19:11.097504", + "step": 1526, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017400800716131926, + "timestamp": "2025-09-10 02:19:11.113340", + "step": 1527, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:11.145937", + "step": 1527, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031910340767353773, + "timestamp": "2025-09-10 02:19:11.178488", + "step": 1528, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:11.209726", + "step": 1528, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00998393353074789, + "timestamp": "2025-09-10 02:19:11.214348", + "step": 1529, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:11.245050", + "step": 1529, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007462826557457447, + "timestamp": "2025-09-10 02:19:11.252114", + "step": 1530, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:11.282679", + "step": 1530, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009829898364841938, + "timestamp": "2025-09-10 02:19:11.293519", + "step": 1531, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:11.324112", + "step": 1531, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011728269048035145, + "timestamp": "2025-09-10 02:19:11.352538", + "step": 1532, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:19:11.391942", + "step": 1532, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009979200549423695, + "timestamp": "2025-09-10 02:19:11.408936", + "step": 1533, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:11.440144", + "step": 1533, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009194576181471348, + "timestamp": "2025-09-10 02:19:11.450196", + "step": 1534, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:11.482122", + "step": 1534, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038809494581073523, + "timestamp": "2025-09-10 02:19:11.485890", + "step": 1535, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:11.516810", + "step": 1535, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04445614293217659, + "timestamp": "2025-09-10 02:19:11.544585", + "step": 1536, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:11.575947", + "step": 1536, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0013619901146739721, + "timestamp": "2025-09-10 02:19:11.578278", + "step": 1537, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:11.609705", + "step": 1537, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008444820530712605, + "timestamp": "2025-09-10 02:19:11.613684", + "step": 1538, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:11.645949", + "step": 1538, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010615772567689419, + "timestamp": "2025-09-10 02:19:11.655699", + "step": 1539, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:11.686968", + "step": 1539, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018414005637168884, + "timestamp": "2025-09-10 02:19:11.715405", + "step": 1540, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:11.746396", + "step": 1540, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016833599656820297, + "timestamp": "2025-09-10 02:19:11.748965", + "step": 1541, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:11.780055", + "step": 1541, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021267401054501534, + "timestamp": "2025-09-10 02:19:11.790787", + "step": 1542, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:11.820770", + "step": 1542, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016013866988942027, + "timestamp": "2025-09-10 02:19:11.827864", + "step": 1543, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:11.859009", + "step": 1543, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002547146985307336, + "timestamp": "2025-09-10 02:19:11.889785", + "step": 1544, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:11.923472", + "step": 1544, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0054414160549640656, + "timestamp": "2025-09-10 02:19:11.936361", + "step": 1545, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:11.967739", + "step": 1545, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003947163466364145, + "timestamp": "2025-09-10 02:19:11.974295", + "step": 1546, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:12.005187", + "step": 1546, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011945655569434166, + "timestamp": "2025-09-10 02:19:12.009348", + "step": 1547, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:12.043937", + "step": 1547, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024208705872297287, + "timestamp": "2025-09-10 02:19:12.074259", + "step": 1548, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:12.106578", + "step": 1548, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0040494343265891075, + "timestamp": "2025-09-10 02:19:12.111391", + "step": 1549, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:12.143234", + "step": 1549, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034531753044575453, + "timestamp": "2025-09-10 02:19:12.147564", + "step": 1550, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:12.178742", + "step": 1550, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006650130730122328, + "timestamp": "2025-09-10 02:19:12.189561", + "step": 1551, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:12.220244", + "step": 1551, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015004181303083897, + "timestamp": "2025-09-10 02:19:12.245449", + "step": 1552, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:12.276928", + "step": 1552, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04160107299685478, + "timestamp": "2025-09-10 02:19:12.282194", + "step": 1553, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:12.317576", + "step": 1553, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034997588954865932, + "timestamp": "2025-09-10 02:19:12.324532", + "step": 1554, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:12.361708", + "step": 1554, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032703883945941925, + "timestamp": "2025-09-10 02:19:12.365640", + "step": 1555, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:12.402175", + "step": 1555, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0007587299915030599, + "timestamp": "2025-09-10 02:19:12.435646", + "step": 1556, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:12.472372", + "step": 1556, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01702108420431614, + "timestamp": "2025-09-10 02:19:12.477555", + "step": 1557, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:12.512655", + "step": 1557, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03824358060956001, + "timestamp": "2025-09-10 02:19:12.520053", + "step": 1558, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:12.554768", + "step": 1558, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04590751603245735, + "timestamp": "2025-09-10 02:19:12.561526", + "step": 1559, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:12.593528", + "step": 1559, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0073296381160616875, + "timestamp": "2025-09-10 02:19:12.621090", + "step": 1560, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:12.656359", + "step": 1560, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00989292562007904, + "timestamp": "2025-09-10 02:19:12.665182", + "step": 1561, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:12.698238", + "step": 1561, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004150025546550751, + "timestamp": "2025-09-10 02:19:12.710416", + "step": 1562, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:12.743156", + "step": 1562, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005096559878438711, + "timestamp": "2025-09-10 02:19:12.750017", + "step": 1563, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:12.783012", + "step": 1563, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010926149785518646, + "timestamp": "2025-09-10 02:19:12.808126", + "step": 1564, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:12.838762", + "step": 1564, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0010295318206772208, + "timestamp": "2025-09-10 02:19:12.840914", + "step": 1565, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:12.871398", + "step": 1565, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03598492965102196, + "timestamp": "2025-09-10 02:19:12.878101", + "step": 1566, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:12.908645", + "step": 1566, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020379869267344475, + "timestamp": "2025-09-10 02:19:12.919096", + "step": 1567, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:12.950808", + "step": 1567, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014766373671591282, + "timestamp": "2025-09-10 02:19:12.979285", + "step": 1568, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:13.013184", + "step": 1568, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004263672977685928, + "timestamp": "2025-09-10 02:19:13.025928", + "step": 1569, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:13.059028", + "step": 1569, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003036454552784562, + "timestamp": "2025-09-10 02:19:13.061593", + "step": 1570, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:13.092133", + "step": 1570, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02348274551331997, + "timestamp": "2025-09-10 02:19:13.099943", + "step": 1571, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:13.133967", + "step": 1571, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004423712845891714, + "timestamp": "2025-09-10 02:19:13.168482", + "step": 1572, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:13.201480", + "step": 1572, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015006057918071747, + "timestamp": "2025-09-10 02:19:13.210174", + "step": 1573, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:13.241404", + "step": 1573, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01116950623691082, + "timestamp": "2025-09-10 02:19:13.245612", + "step": 1574, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:13.277344", + "step": 1574, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006449028849601746, + "timestamp": "2025-09-10 02:19:13.281195", + "step": 1575, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:13.312545", + "step": 1575, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013472805730998516, + "timestamp": "2025-09-10 02:19:13.340182", + "step": 1576, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:13.371388", + "step": 1576, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.043971676379442215, + "timestamp": "2025-09-10 02:19:13.373639", + "step": 1577, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:13.405054", + "step": 1577, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0046881563030183315, + "timestamp": "2025-09-10 02:19:13.411863", + "step": 1578, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:19:13.442366", + "step": 1578, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024195548612624407, + "timestamp": "2025-09-10 02:19:13.444544", + "step": 1579, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:13.475562", + "step": 1579, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01876218058168888, + "timestamp": "2025-09-10 02:19:13.503355", + "step": 1580, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:13.535008", + "step": 1580, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012913152575492859, + "timestamp": "2025-09-10 02:19:13.540082", + "step": 1581, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:13.570603", + "step": 1581, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017830540891736746, + "timestamp": "2025-09-10 02:19:13.577270", + "step": 1582, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:13.610802", + "step": 1582, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05414802208542824, + "timestamp": "2025-09-10 02:19:13.622355", + "step": 1583, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:13.653529", + "step": 1583, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01702873595058918, + "timestamp": "2025-09-10 02:19:13.681039", + "step": 1584, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:13.712174", + "step": 1584, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02116026170551777, + "timestamp": "2025-09-10 02:19:13.719993", + "step": 1585, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:13.751245", + "step": 1585, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006673253607004881, + "timestamp": "2025-09-10 02:19:13.763423", + "step": 1586, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:13.795855", + "step": 1586, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004170980304479599, + "timestamp": "2025-09-10 02:19:13.802709", + "step": 1587, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:13.835247", + "step": 1587, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013706117868423462, + "timestamp": "2025-09-10 02:19:13.865659", + "step": 1588, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:13.897377", + "step": 1588, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012754879426211119, + "timestamp": "2025-09-10 02:19:13.902337", + "step": 1589, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:13.934244", + "step": 1589, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002866287948563695, + "timestamp": "2025-09-10 02:19:13.945068", + "step": 1590, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:13.975845", + "step": 1590, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007324092090129852, + "timestamp": "2025-09-10 02:19:13.985907", + "step": 1591, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:14.017461", + "step": 1591, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02281094528734684, + "timestamp": "2025-09-10 02:19:14.045784", + "step": 1592, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:19:14.077329", + "step": 1592, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005747564602643251, + "timestamp": "2025-09-10 02:19:14.079549", + "step": 1593, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:14.110416", + "step": 1593, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00610779132694006, + "timestamp": "2025-09-10 02:19:14.122081", + "step": 1594, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:14.153333", + "step": 1594, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02713647671043873, + "timestamp": "2025-09-10 02:19:14.160177", + "step": 1595, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:14.190485", + "step": 1595, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023267099633812904, + "timestamp": "2025-09-10 02:19:14.214370", + "step": 1596, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:14.245070", + "step": 1596, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023487545549869537, + "timestamp": "2025-09-10 02:19:14.252377", + "step": 1597, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:14.284544", + "step": 1597, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011892582988366485, + "timestamp": "2025-09-10 02:19:14.294391", + "step": 1598, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:14.325831", + "step": 1598, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0046529993414878845, + "timestamp": "2025-09-10 02:19:14.333072", + "step": 1599, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:14.367251", + "step": 1599, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0007145693525671959, + "timestamp": "2025-09-10 02:19:14.401520", + "step": 1600, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:14.434584", + "step": 1600, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030808603391051292, + "timestamp": "2025-09-10 02:19:14.436496", + "step": 1601, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:14.468690", + "step": 1601, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029920728877186775, + "timestamp": "2025-09-10 02:19:14.480403", + "step": 1602, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:14.511771", + "step": 1602, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010802625678479671, + "timestamp": "2025-09-10 02:19:14.519362", + "step": 1603, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:14.550633", + "step": 1603, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010367213748395443, + "timestamp": "2025-09-10 02:19:14.583769", + "step": 1604, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:14.614780", + "step": 1604, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004078761674463749, + "timestamp": "2025-09-10 02:19:14.619101", + "step": 1605, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:14.650662", + "step": 1605, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01048226747661829, + "timestamp": "2025-09-10 02:19:14.657463", + "step": 1606, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:14.689199", + "step": 1606, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020741861313581467, + "timestamp": "2025-09-10 02:19:14.696470", + "step": 1607, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:14.727985", + "step": 1607, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02420172281563282, + "timestamp": "2025-09-10 02:19:14.760321", + "step": 1608, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:14.791127", + "step": 1608, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011232390999794006, + "timestamp": "2025-09-10 02:19:14.793190", + "step": 1609, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:14.824541", + "step": 1609, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025039060041308403, + "timestamp": "2025-09-10 02:19:14.831385", + "step": 1610, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:14.862592", + "step": 1610, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010274732485413551, + "timestamp": "2025-09-10 02:19:14.869832", + "step": 1611, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:14.901580", + "step": 1611, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007137875538319349, + "timestamp": "2025-09-10 02:19:14.929641", + "step": 1612, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:14.961939", + "step": 1612, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006099893245846033, + "timestamp": "2025-09-10 02:19:14.974929", + "step": 1613, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:19:15.013784", + "step": 1613, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013428665697574615, + "timestamp": "2025-09-10 02:19:15.029699", + "step": 1614, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:15.061526", + "step": 1614, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02884194441139698, + "timestamp": "2025-09-10 02:19:15.068318", + "step": 1615, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:15.099983", + "step": 1615, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03412342816591263, + "timestamp": "2025-09-10 02:19:15.130601", + "step": 1616, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:15.162345", + "step": 1616, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016644364222884178, + "timestamp": "2025-09-10 02:19:15.166419", + "step": 1617, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:19:25.242216", + "step": 1617, + "epoch": 1 + }, + { + "type": "pplx", + "content": 14254475.265608242, + "timestamp": "2025-09-10 02:19:25.257155", + "step": 1617, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:25.296379", + "step": 1617, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030304626561701298, + "timestamp": "2025-09-10 02:19:25.299826", + "step": 1618, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:25.332126", + "step": 1618, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015295592602342367, + "timestamp": "2025-09-10 02:19:25.343829", + "step": 1619, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:25.375724", + "step": 1619, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010093179531395435, + "timestamp": "2025-09-10 02:19:25.403746", + "step": 1620, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:25.442447", + "step": 1620, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04103449359536171, + "timestamp": "2025-09-10 02:19:25.447594", + "step": 1621, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:25.480406", + "step": 1621, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030527640134096146, + "timestamp": "2025-09-10 02:19:25.492574", + "step": 1622, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:25.523519", + "step": 1622, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03347934037446976, + "timestamp": "2025-09-10 02:19:25.531231", + "step": 1623, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:25.561588", + "step": 1623, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005834028124809265, + "timestamp": "2025-09-10 02:19:25.590191", + "step": 1624, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:25.623761", + "step": 1624, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011887645348906517, + "timestamp": "2025-09-10 02:19:25.627883", + "step": 1625, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:25.660861", + "step": 1625, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020918427035212517, + "timestamp": "2025-09-10 02:19:25.671539", + "step": 1626, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:25.707325", + "step": 1626, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004566980060189962, + "timestamp": "2025-09-10 02:19:25.713071", + "step": 1627, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:25.744771", + "step": 1627, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01930670067667961, + "timestamp": "2025-09-10 02:19:25.769696", + "step": 1628, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:25.800237", + "step": 1628, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01942528784275055, + "timestamp": "2025-09-10 02:19:25.804804", + "step": 1629, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:25.835511", + "step": 1629, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012651464901864529, + "timestamp": "2025-09-10 02:19:25.845581", + "step": 1630, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:25.885732", + "step": 1630, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028396448120474815, + "timestamp": "2025-09-10 02:19:25.889920", + "step": 1631, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:25.924490", + "step": 1631, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010911746881902218, + "timestamp": "2025-09-10 02:19:25.955142", + "step": 1632, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:25.986806", + "step": 1632, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031062575057148933, + "timestamp": "2025-09-10 02:19:25.991159", + "step": 1633, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:26.025471", + "step": 1633, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010253122076392174, + "timestamp": "2025-09-10 02:19:26.034930", + "step": 1634, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:26.067458", + "step": 1634, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015442321076989174, + "timestamp": "2025-09-10 02:19:26.071192", + "step": 1635, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:26.103396", + "step": 1635, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004601712804287672, + "timestamp": "2025-09-10 02:19:26.128461", + "step": 1636, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:26.169924", + "step": 1636, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006141430698335171, + "timestamp": "2025-09-10 02:19:26.174411", + "step": 1637, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:26.211983", + "step": 1637, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004029339645057917, + "timestamp": "2025-09-10 02:19:26.215841", + "step": 1638, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:26.247780", + "step": 1638, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010412991046905518, + "timestamp": "2025-09-10 02:19:26.252030", + "step": 1639, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:26.283809", + "step": 1639, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013454841449856758, + "timestamp": "2025-09-10 02:19:26.312179", + "step": 1640, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:26.344145", + "step": 1640, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02152983471751213, + "timestamp": "2025-09-10 02:19:26.351604", + "step": 1641, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:26.392487", + "step": 1641, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006329267751425505, + "timestamp": "2025-09-10 02:19:26.399247", + "step": 1642, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:26.437367", + "step": 1642, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01075258944183588, + "timestamp": "2025-09-10 02:19:26.449101", + "step": 1643, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:26.487476", + "step": 1643, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0039253514260053635, + "timestamp": "2025-09-10 02:19:26.515046", + "step": 1644, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:26.556068", + "step": 1644, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012616248801350594, + "timestamp": "2025-09-10 02:19:26.560104", + "step": 1645, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:26.597052", + "step": 1645, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019528865814208984, + "timestamp": "2025-09-10 02:19:26.603995", + "step": 1646, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:26.638664", + "step": 1646, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033076356630772352, + "timestamp": "2025-09-10 02:19:26.646191", + "step": 1647, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:26.681923", + "step": 1647, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01205162238329649, + "timestamp": "2025-09-10 02:19:26.716520", + "step": 1648, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:26.751236", + "step": 1648, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020559037104249, + "timestamp": "2025-09-10 02:19:26.753290", + "step": 1649, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:26.784471", + "step": 1649, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013713809661567211, + "timestamp": "2025-09-10 02:19:26.792067", + "step": 1650, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:26.824240", + "step": 1650, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026908008381724358, + "timestamp": "2025-09-10 02:19:26.831139", + "step": 1651, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:26.864818", + "step": 1651, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019599396735429764, + "timestamp": "2025-09-10 02:19:26.891705", + "step": 1652, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:19:26.928007", + "step": 1652, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02248522825539112, + "timestamp": "2025-09-10 02:19:26.943137", + "step": 1653, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:26.976637", + "step": 1653, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0142592191696167, + "timestamp": "2025-09-10 02:19:26.982632", + "step": 1654, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:27.016068", + "step": 1654, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020718032494187355, + "timestamp": "2025-09-10 02:19:27.022551", + "step": 1655, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:27.054261", + "step": 1655, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05304405093193054, + "timestamp": "2025-09-10 02:19:27.081700", + "step": 1656, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:19:27.118254", + "step": 1656, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.040047433227300644, + "timestamp": "2025-09-10 02:19:27.133381", + "step": 1657, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:19:27.174828", + "step": 1657, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02052774466574192, + "timestamp": "2025-09-10 02:19:27.191899", + "step": 1658, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:27.225404", + "step": 1658, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013448750600218773, + "timestamp": "2025-09-10 02:19:27.231262", + "step": 1659, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:27.262310", + "step": 1659, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005621184129267931, + "timestamp": "2025-09-10 02:19:27.289514", + "step": 1660, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:27.320820", + "step": 1660, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020898720249533653, + "timestamp": "2025-09-10 02:19:27.330737", + "step": 1661, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:27.362638", + "step": 1661, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017461569979786873, + "timestamp": "2025-09-10 02:19:27.375213", + "step": 1662, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:19:27.418839", + "step": 1662, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023927049711346626, + "timestamp": "2025-09-10 02:19:27.436385", + "step": 1663, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:27.472070", + "step": 1663, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011457578279078007, + "timestamp": "2025-09-10 02:19:27.505457", + "step": 1664, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:27.540203", + "step": 1664, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0047464510425925255, + "timestamp": "2025-09-10 02:19:27.544670", + "step": 1665, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:27.577596", + "step": 1665, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03707936406135559, + "timestamp": "2025-09-10 02:19:27.582577", + "step": 1666, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:27.614332", + "step": 1666, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.042561326175928116, + "timestamp": "2025-09-10 02:19:27.617702", + "step": 1667, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:27.650523", + "step": 1667, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005984437186270952, + "timestamp": "2025-09-10 02:19:27.683287", + "step": 1668, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:27.714055", + "step": 1668, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011304566636681557, + "timestamp": "2025-09-10 02:19:27.722639", + "step": 1669, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:27.754009", + "step": 1669, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0555756576359272, + "timestamp": "2025-09-10 02:19:27.761052", + "step": 1670, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:27.791616", + "step": 1670, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014852997846901417, + "timestamp": "2025-09-10 02:19:27.795734", + "step": 1671, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:27.826214", + "step": 1671, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.000465600925963372, + "timestamp": "2025-09-10 02:19:27.853922", + "step": 1672, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:27.884679", + "step": 1672, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011479363776743412, + "timestamp": "2025-09-10 02:19:27.889997", + "step": 1673, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:27.921029", + "step": 1673, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00886636320501566, + "timestamp": "2025-09-10 02:19:27.931296", + "step": 1674, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:27.962835", + "step": 1674, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.033997684717178345, + "timestamp": "2025-09-10 02:19:27.969980", + "step": 1675, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:28.011996", + "step": 1675, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028899533674120903, + "timestamp": "2025-09-10 02:19:28.044941", + "step": 1676, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:28.075988", + "step": 1676, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015834344550967216, + "timestamp": "2025-09-10 02:19:28.080818", + "step": 1677, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:28.126402", + "step": 1677, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0010925616370514035, + "timestamp": "2025-09-10 02:19:28.133192", + "step": 1678, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:28.165787", + "step": 1678, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011072367429733276, + "timestamp": "2025-09-10 02:19:28.176200", + "step": 1679, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:19:28.215114", + "step": 1679, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014479962177574635, + "timestamp": "2025-09-10 02:19:28.252184", + "step": 1680, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:28.283422", + "step": 1680, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005413788836449385, + "timestamp": "2025-09-10 02:19:28.288347", + "step": 1681, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:28.323315", + "step": 1681, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007339499890804291, + "timestamp": "2025-09-10 02:19:28.337248", + "step": 1682, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:28.368533", + "step": 1682, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010168512351810932, + "timestamp": "2025-09-10 02:19:28.373074", + "step": 1683, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:19:28.408455", + "step": 1683, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007975684478878975, + "timestamp": "2025-09-10 02:19:28.443073", + "step": 1684, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:28.473739", + "step": 1684, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015236958861351013, + "timestamp": "2025-09-10 02:19:28.478172", + "step": 1685, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:28.508675", + "step": 1685, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010547908022999763, + "timestamp": "2025-09-10 02:19:28.516263", + "step": 1686, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:28.547696", + "step": 1686, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036612115800380707, + "timestamp": "2025-09-10 02:19:28.554535", + "step": 1687, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:28.587864", + "step": 1687, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02034112438559532, + "timestamp": "2025-09-10 02:19:28.622149", + "step": 1688, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:28.657085", + "step": 1688, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012231721542775631, + "timestamp": "2025-09-10 02:19:28.667076", + "step": 1689, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:28.701677", + "step": 1689, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006200658623129129, + "timestamp": "2025-09-10 02:19:28.710829", + "step": 1690, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:28.741882", + "step": 1690, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011389417573809624, + "timestamp": "2025-09-10 02:19:28.746123", + "step": 1691, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:28.777247", + "step": 1691, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028033211827278137, + "timestamp": "2025-09-10 02:19:28.805545", + "step": 1692, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:28.837135", + "step": 1692, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007890121079981327, + "timestamp": "2025-09-10 02:19:28.841907", + "step": 1693, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:28.872512", + "step": 1693, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03063378855586052, + "timestamp": "2025-09-10 02:19:28.875117", + "step": 1694, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:28.905933", + "step": 1694, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022948638070374727, + "timestamp": "2025-09-10 02:19:28.916771", + "step": 1695, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:28.947133", + "step": 1695, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002465051133185625, + "timestamp": "2025-09-10 02:19:28.978206", + "step": 1696, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:29.008980", + "step": 1696, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009558682329952717, + "timestamp": "2025-09-10 02:19:29.018783", + "step": 1697, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:29.049904", + "step": 1697, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016664791852235794, + "timestamp": "2025-09-10 02:19:29.057524", + "step": 1698, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:29.091977", + "step": 1698, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013536560349166393, + "timestamp": "2025-09-10 02:19:29.105628", + "step": 1699, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:19:29.144132", + "step": 1699, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008019420318305492, + "timestamp": "2025-09-10 02:19:29.180893", + "step": 1700, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:29.210938", + "step": 1700, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010356190614402294, + "timestamp": "2025-09-10 02:19:29.215979", + "step": 1701, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:29.256203", + "step": 1701, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016179528087377548, + "timestamp": "2025-09-10 02:19:29.260046", + "step": 1702, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:29.293405", + "step": 1702, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018332941457629204, + "timestamp": "2025-09-10 02:19:29.306772", + "step": 1703, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:29.338414", + "step": 1703, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02060030773282051, + "timestamp": "2025-09-10 02:19:29.369647", + "step": 1704, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:29.403513", + "step": 1704, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0144452890381217, + "timestamp": "2025-09-10 02:19:29.411462", + "step": 1705, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:29.442717", + "step": 1705, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016921203583478928, + "timestamp": "2025-09-10 02:19:29.449999", + "step": 1706, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:29.480120", + "step": 1706, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03076860122382641, + "timestamp": "2025-09-10 02:19:29.486961", + "step": 1707, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:29.518046", + "step": 1707, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010674857534468174, + "timestamp": "2025-09-10 02:19:29.551498", + "step": 1708, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:29.582268", + "step": 1708, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004811963532119989, + "timestamp": "2025-09-10 02:19:29.587283", + "step": 1709, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:29.617891", + "step": 1709, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02059813216328621, + "timestamp": "2025-09-10 02:19:29.628103", + "step": 1710, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:29.661190", + "step": 1710, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04477255791425705, + "timestamp": "2025-09-10 02:19:29.668702", + "step": 1711, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:29.699404", + "step": 1711, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022556250914931297, + "timestamp": "2025-09-10 02:19:29.727279", + "step": 1712, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:29.760289", + "step": 1712, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014114036224782467, + "timestamp": "2025-09-10 02:19:29.773276", + "step": 1713, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:29.803259", + "step": 1713, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02027253620326519, + "timestamp": "2025-09-10 02:19:29.810376", + "step": 1714, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:29.840408", + "step": 1714, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004980639088898897, + "timestamp": "2025-09-10 02:19:29.844370", + "step": 1715, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:29.877268", + "step": 1715, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01874137483537197, + "timestamp": "2025-09-10 02:19:29.905832", + "step": 1716, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:29.936597", + "step": 1716, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00487111508846283, + "timestamp": "2025-09-10 02:19:29.938607", + "step": 1717, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:19:29.984718", + "step": 1717, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008420931175351143, + "timestamp": "2025-09-10 02:19:30.003909", + "step": 1718, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:30.034431", + "step": 1718, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00927684735506773, + "timestamp": "2025-09-10 02:19:30.041409", + "step": 1719, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:30.074103", + "step": 1719, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00768580287694931, + "timestamp": "2025-09-10 02:19:30.107605", + "step": 1720, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:19:30.137477", + "step": 1720, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01326842326670885, + "timestamp": "2025-09-10 02:19:30.139625", + "step": 1721, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:30.171634", + "step": 1721, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004389611072838306, + "timestamp": "2025-09-10 02:19:30.182588", + "step": 1722, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:30.213265", + "step": 1722, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020359130576252937, + "timestamp": "2025-09-10 02:19:30.217724", + "step": 1723, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:30.253023", + "step": 1723, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019980600103735924, + "timestamp": "2025-09-10 02:19:30.287976", + "step": 1724, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:30.319961", + "step": 1724, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012974241748452187, + "timestamp": "2025-09-10 02:19:30.327257", + "step": 1725, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:30.359348", + "step": 1725, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012521286495029926, + "timestamp": "2025-09-10 02:19:30.367171", + "step": 1726, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:30.401011", + "step": 1726, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01130104623734951, + "timestamp": "2025-09-10 02:19:30.414735", + "step": 1727, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:30.446730", + "step": 1727, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.032924991101026535, + "timestamp": "2025-09-10 02:19:30.475325", + "step": 1728, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:30.507565", + "step": 1728, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01869148574769497, + "timestamp": "2025-09-10 02:19:30.511832", + "step": 1729, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:30.542177", + "step": 1729, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003208654234185815, + "timestamp": "2025-09-10 02:19:30.544625", + "step": 1730, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:30.574883", + "step": 1730, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008543224073946476, + "timestamp": "2025-09-10 02:19:30.577567", + "step": 1731, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:30.608550", + "step": 1731, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009434954263269901, + "timestamp": "2025-09-10 02:19:30.636421", + "step": 1732, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:19:30.673056", + "step": 1732, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05541825294494629, + "timestamp": "2025-09-10 02:19:30.688495", + "step": 1733, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:30.719019", + "step": 1733, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0067622484639286995, + "timestamp": "2025-09-10 02:19:30.723483", + "step": 1734, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:30.754703", + "step": 1734, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015380342490971088, + "timestamp": "2025-09-10 02:19:30.762473", + "step": 1735, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:30.792914", + "step": 1735, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017134329304099083, + "timestamp": "2025-09-10 02:19:30.821094", + "step": 1736, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:30.851110", + "step": 1736, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03203447908163071, + "timestamp": "2025-09-10 02:19:30.853371", + "step": 1737, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:30.892062", + "step": 1737, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003379482077434659, + "timestamp": "2025-09-10 02:19:30.898984", + "step": 1738, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:30.939908", + "step": 1738, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011437847279012203, + "timestamp": "2025-09-10 02:19:30.953293", + "step": 1739, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:30.985412", + "step": 1739, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004656031262129545, + "timestamp": "2025-09-10 02:19:31.014134", + "step": 1740, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:31.045209", + "step": 1740, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003976056352257729, + "timestamp": "2025-09-10 02:19:31.050582", + "step": 1741, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:31.083172", + "step": 1741, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004918371327221394, + "timestamp": "2025-09-10 02:19:31.087586", + "step": 1742, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:19:31.141165", + "step": 1742, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026369964703917503, + "timestamp": "2025-09-10 02:19:31.162721", + "step": 1743, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:31.193380", + "step": 1743, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038020031061023474, + "timestamp": "2025-09-10 02:19:31.217175", + "step": 1744, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:31.247831", + "step": 1744, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0021418523974716663, + "timestamp": "2025-09-10 02:19:31.257520", + "step": 1745, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:31.288992", + "step": 1745, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00539687043055892, + "timestamp": "2025-09-10 02:19:31.292992", + "step": 1746, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:31.323652", + "step": 1746, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003791423747316003, + "timestamp": "2025-09-10 02:19:31.327975", + "step": 1747, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:31.358015", + "step": 1747, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005174871999770403, + "timestamp": "2025-09-10 02:19:31.381555", + "step": 1748, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:31.413188", + "step": 1748, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009157408960163593, + "timestamp": "2025-09-10 02:19:31.415329", + "step": 1749, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:31.445518", + "step": 1749, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012222186662256718, + "timestamp": "2025-09-10 02:19:31.448319", + "step": 1750, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:19:31.487613", + "step": 1750, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013511120341718197, + "timestamp": "2025-09-10 02:19:31.503936", + "step": 1751, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:31.537947", + "step": 1751, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018908429890871048, + "timestamp": "2025-09-10 02:19:31.572494", + "step": 1752, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:31.603343", + "step": 1752, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00666068447753787, + "timestamp": "2025-09-10 02:19:31.611216", + "step": 1753, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:31.642078", + "step": 1753, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012360441498458385, + "timestamp": "2025-09-10 02:19:31.652141", + "step": 1754, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:31.684148", + "step": 1754, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003066555829718709, + "timestamp": "2025-09-10 02:19:31.691911", + "step": 1755, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:31.722517", + "step": 1755, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02608044445514679, + "timestamp": "2025-09-10 02:19:31.750820", + "step": 1756, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:31.781433", + "step": 1756, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0021821721456944942, + "timestamp": "2025-09-10 02:19:31.786006", + "step": 1757, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:31.817734", + "step": 1757, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019431469962000847, + "timestamp": "2025-09-10 02:19:31.821666", + "step": 1758, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:31.851856", + "step": 1758, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008259564638137817, + "timestamp": "2025-09-10 02:19:31.856444", + "step": 1759, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:31.886455", + "step": 1759, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00771870044991374, + "timestamp": "2025-09-10 02:19:31.918190", + "step": 1760, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:31.949685", + "step": 1760, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007215961813926697, + "timestamp": "2025-09-10 02:19:31.952016", + "step": 1761, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:31.983238", + "step": 1761, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0035610010381788015, + "timestamp": "2025-09-10 02:19:31.995161", + "step": 1762, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:32.026241", + "step": 1762, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011534550227224827, + "timestamp": "2025-09-10 02:19:32.033825", + "step": 1763, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:32.065156", + "step": 1763, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009490884840488434, + "timestamp": "2025-09-10 02:19:32.096112", + "step": 1764, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:19:42.252845", + "step": 1764, + "epoch": 1 + }, + { + "type": "pplx", + "content": 13646644.047763163, + "timestamp": "2025-09-10 02:19:42.255566", + "step": 1764, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:42.285893", + "step": 1764, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009771243669092655, + "timestamp": "2025-09-10 02:19:42.288021", + "step": 1765, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:42.319947", + "step": 1765, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0313577726483345, + "timestamp": "2025-09-10 02:19:42.326528", + "step": 1766, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:42.358434", + "step": 1766, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014947721734642982, + "timestamp": "2025-09-10 02:19:42.368142", + "step": 1767, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:42.399916", + "step": 1767, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006268322933465242, + "timestamp": "2025-09-10 02:19:42.424984", + "step": 1768, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:42.457111", + "step": 1768, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015659164637327194, + "timestamp": "2025-09-10 02:19:42.461865", + "step": 1769, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:42.493385", + "step": 1769, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023305343464016914, + "timestamp": "2025-09-10 02:19:42.500141", + "step": 1770, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:42.533254", + "step": 1770, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027237599715590477, + "timestamp": "2025-09-10 02:19:42.540707", + "step": 1771, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:42.572132", + "step": 1771, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0052889627404510975, + "timestamp": "2025-09-10 02:19:42.600096", + "step": 1772, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:42.633366", + "step": 1772, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008854638785123825, + "timestamp": "2025-09-10 02:19:42.642834", + "step": 1773, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:42.673736", + "step": 1773, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037044784985482693, + "timestamp": "2025-09-10 02:19:42.680657", + "step": 1774, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:42.712060", + "step": 1774, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0077804699540138245, + "timestamp": "2025-09-10 02:19:42.719552", + "step": 1775, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:42.750994", + "step": 1775, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004913232754915953, + "timestamp": "2025-09-10 02:19:42.778581", + "step": 1776, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:42.811582", + "step": 1776, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.051165949553251266, + "timestamp": "2025-09-10 02:19:42.824296", + "step": 1777, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:42.856365", + "step": 1777, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004899430554360151, + "timestamp": "2025-09-10 02:19:42.867271", + "step": 1778, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:42.897553", + "step": 1778, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05997491627931595, + "timestamp": "2025-09-10 02:19:42.900109", + "step": 1779, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:42.931479", + "step": 1779, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.031111031770706177, + "timestamp": "2025-09-10 02:19:42.963258", + "step": 1780, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:42.996526", + "step": 1780, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01209025364369154, + "timestamp": "2025-09-10 02:19:43.009852", + "step": 1781, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:43.042551", + "step": 1781, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004403825383633375, + "timestamp": "2025-09-10 02:19:43.053498", + "step": 1782, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:43.085270", + "step": 1782, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022716468200087547, + "timestamp": "2025-09-10 02:19:43.089510", + "step": 1783, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:19:43.127488", + "step": 1783, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04864273592829704, + "timestamp": "2025-09-10 02:19:43.164051", + "step": 1784, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:43.195469", + "step": 1784, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015554594807326794, + "timestamp": "2025-09-10 02:19:43.203965", + "step": 1785, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:43.234101", + "step": 1785, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005889651831239462, + "timestamp": "2025-09-10 02:19:43.236636", + "step": 1786, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:43.267481", + "step": 1786, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009219110012054443, + "timestamp": "2025-09-10 02:19:43.274228", + "step": 1787, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:19:43.305143", + "step": 1787, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011448384262621403, + "timestamp": "2025-09-10 02:19:43.328394", + "step": 1788, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:43.358848", + "step": 1788, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006459720432758331, + "timestamp": "2025-09-10 02:19:43.363268", + "step": 1789, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:43.395200", + "step": 1789, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007149911485612392, + "timestamp": "2025-09-10 02:19:43.398808", + "step": 1790, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:19:43.437204", + "step": 1790, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008804053999483585, + "timestamp": "2025-09-10 02:19:43.452810", + "step": 1791, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:43.484768", + "step": 1791, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008087508380413055, + "timestamp": "2025-09-10 02:19:43.515258", + "step": 1792, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:43.549534", + "step": 1792, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02685811184346676, + "timestamp": "2025-09-10 02:19:43.562840", + "step": 1793, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:43.595065", + "step": 1793, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007293777074664831, + "timestamp": "2025-09-10 02:19:43.602126", + "step": 1794, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:43.635093", + "step": 1794, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011456483043730259, + "timestamp": "2025-09-10 02:19:43.639291", + "step": 1795, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:43.672132", + "step": 1795, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02691769227385521, + "timestamp": "2025-09-10 02:19:43.696135", + "step": 1796, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:43.727428", + "step": 1796, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0008614645921625197, + "timestamp": "2025-09-10 02:19:43.729755", + "step": 1797, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:43.761349", + "step": 1797, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01995791494846344, + "timestamp": "2025-09-10 02:19:43.768894", + "step": 1798, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:43.799930", + "step": 1798, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013489159755408764, + "timestamp": "2025-09-10 02:19:43.806777", + "step": 1799, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:43.837880", + "step": 1799, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014494777657091618, + "timestamp": "2025-09-10 02:19:43.870637", + "step": 1800, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:43.903188", + "step": 1800, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02528318762779236, + "timestamp": "2025-09-10 02:19:43.915834", + "step": 1801, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:43.949804", + "step": 1801, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0043422463349998, + "timestamp": "2025-09-10 02:19:43.963175", + "step": 1802, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:43.994251", + "step": 1802, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019023737404495478, + "timestamp": "2025-09-10 02:19:43.996710", + "step": 1803, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:44.027467", + "step": 1803, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01238183956593275, + "timestamp": "2025-09-10 02:19:44.052681", + "step": 1804, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:44.083781", + "step": 1804, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01738804019987583, + "timestamp": "2025-09-10 02:19:44.086049", + "step": 1805, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:44.117517", + "step": 1805, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023342971689999104, + "timestamp": "2025-09-10 02:19:44.125013", + "step": 1806, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:44.160189", + "step": 1806, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005149137694388628, + "timestamp": "2025-09-10 02:19:44.173597", + "step": 1807, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:44.204211", + "step": 1807, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011267533991485834, + "timestamp": "2025-09-10 02:19:44.229782", + "step": 1808, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:44.261247", + "step": 1808, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022445213049650192, + "timestamp": "2025-09-10 02:19:44.269737", + "step": 1809, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:44.301470", + "step": 1809, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034575022757053375, + "timestamp": "2025-09-10 02:19:44.311176", + "step": 1810, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:44.342260", + "step": 1810, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002625245600938797, + "timestamp": "2025-09-10 02:19:44.344711", + "step": 1811, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:44.376014", + "step": 1811, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00602992856875062, + "timestamp": "2025-09-10 02:19:44.408890", + "step": 1812, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:44.440634", + "step": 1812, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003411894431337714, + "timestamp": "2025-09-10 02:19:44.444840", + "step": 1813, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:44.476205", + "step": 1813, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015310808084905148, + "timestamp": "2025-09-10 02:19:44.486556", + "step": 1814, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:44.517642", + "step": 1814, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005168873351067305, + "timestamp": "2025-09-10 02:19:44.529701", + "step": 1815, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:19:44.562139", + "step": 1815, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008812850341200829, + "timestamp": "2025-09-10 02:19:44.585695", + "step": 1816, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:44.618878", + "step": 1816, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014714348129928112, + "timestamp": "2025-09-10 02:19:44.631876", + "step": 1817, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:44.664299", + "step": 1817, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012252910062670708, + "timestamp": "2025-09-10 02:19:44.674674", + "step": 1818, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:44.705225", + "step": 1818, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03599643334746361, + "timestamp": "2025-09-10 02:19:44.712363", + "step": 1819, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:44.743233", + "step": 1819, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011372431181371212, + "timestamp": "2025-09-10 02:19:44.775022", + "step": 1820, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:44.806401", + "step": 1820, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002817036584019661, + "timestamp": "2025-09-10 02:19:44.810885", + "step": 1821, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:44.845983", + "step": 1821, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007789141498506069, + "timestamp": "2025-09-10 02:19:44.860026", + "step": 1822, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:44.892057", + "step": 1822, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02494768425822258, + "timestamp": "2025-09-10 02:19:44.898761", + "step": 1823, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:44.931244", + "step": 1823, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028473839163780212, + "timestamp": "2025-09-10 02:19:44.964020", + "step": 1824, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:44.996431", + "step": 1824, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028283346444368362, + "timestamp": "2025-09-10 02:19:45.000490", + "step": 1825, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:45.031640", + "step": 1825, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03897113725543022, + "timestamp": "2025-09-10 02:19:45.039246", + "step": 1826, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:45.071557", + "step": 1826, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018188832327723503, + "timestamp": "2025-09-10 02:19:45.078189", + "step": 1827, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:19:45.109791", + "step": 1827, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03504600375890732, + "timestamp": "2025-09-10 02:19:45.133580", + "step": 1828, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:45.165973", + "step": 1828, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01439552940428257, + "timestamp": "2025-09-10 02:19:45.170654", + "step": 1829, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:45.202156", + "step": 1829, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034188800491392612, + "timestamp": "2025-09-10 02:19:45.212188", + "step": 1830, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:45.242754", + "step": 1830, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018745275447145104, + "timestamp": "2025-09-10 02:19:45.249485", + "step": 1831, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:45.281073", + "step": 1831, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01610477827489376, + "timestamp": "2025-09-10 02:19:45.306446", + "step": 1832, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:45.337756", + "step": 1832, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00925888679921627, + "timestamp": "2025-09-10 02:19:45.340062", + "step": 1833, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:45.372125", + "step": 1833, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019423970952630043, + "timestamp": "2025-09-10 02:19:45.379888", + "step": 1834, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:45.412255", + "step": 1834, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004022897686809301, + "timestamp": "2025-09-10 02:19:45.419864", + "step": 1835, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:45.451179", + "step": 1835, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05194368213415146, + "timestamp": "2025-09-10 02:19:45.482178", + "step": 1836, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:45.514061", + "step": 1836, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02164938487112522, + "timestamp": "2025-09-10 02:19:45.518937", + "step": 1837, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:45.549764", + "step": 1837, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006102901417762041, + "timestamp": "2025-09-10 02:19:45.561812", + "step": 1838, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:45.592699", + "step": 1838, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029408836737275124, + "timestamp": "2025-09-10 02:19:45.599380", + "step": 1839, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:45.631255", + "step": 1839, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002608294365927577, + "timestamp": "2025-09-10 02:19:45.663366", + "step": 1840, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:45.695272", + "step": 1840, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007881422527134418, + "timestamp": "2025-09-10 02:19:45.697541", + "step": 1841, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:45.728912", + "step": 1841, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0055758110247552395, + "timestamp": "2025-09-10 02:19:45.736321", + "step": 1842, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:45.767790", + "step": 1842, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010343975387513638, + "timestamp": "2025-09-10 02:19:45.775320", + "step": 1843, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:45.806451", + "step": 1843, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011013594456017017, + "timestamp": "2025-09-10 02:19:45.835059", + "step": 1844, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:45.866731", + "step": 1844, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014858272857964039, + "timestamp": "2025-09-10 02:19:45.871140", + "step": 1845, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:45.903377", + "step": 1845, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016053643077611923, + "timestamp": "2025-09-10 02:19:45.910764", + "step": 1846, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:45.943260", + "step": 1846, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0070062256418168545, + "timestamp": "2025-09-10 02:19:45.953665", + "step": 1847, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:45.989305", + "step": 1847, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005651051644235849, + "timestamp": "2025-09-10 02:19:46.023820", + "step": 1848, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:46.055949", + "step": 1848, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022787367925047874, + "timestamp": "2025-09-10 02:19:46.060181", + "step": 1849, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:46.090933", + "step": 1849, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016836825758218765, + "timestamp": "2025-09-10 02:19:46.098375", + "step": 1850, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:46.128908", + "step": 1850, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022327521815896034, + "timestamp": "2025-09-10 02:19:46.136274", + "step": 1851, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:19:46.174803", + "step": 1851, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016063082963228226, + "timestamp": "2025-09-10 02:19:46.211563", + "step": 1852, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 848 + ], + "flops": 25154260214720 + }, + "timestamp": "2025-09-10 02:19:46.280708", + "step": 1852, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0024673263542354107, + "timestamp": "2025-09-10 02:19:46.310219", + "step": 1853, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:46.346698", + "step": 1853, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004926327615976334, + "timestamp": "2025-09-10 02:19:46.359274", + "step": 1854, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:46.391250", + "step": 1854, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011958016082644463, + "timestamp": "2025-09-10 02:19:46.395459", + "step": 1855, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:46.427033", + "step": 1855, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007356296759098768, + "timestamp": "2025-09-10 02:19:46.452303", + "step": 1856, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:46.483238", + "step": 1856, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010786294937133789, + "timestamp": "2025-09-10 02:19:46.491004", + "step": 1857, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:46.522074", + "step": 1857, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013343775644898415, + "timestamp": "2025-09-10 02:19:46.528769", + "step": 1858, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:46.563654", + "step": 1858, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022976329550147057, + "timestamp": "2025-09-10 02:19:46.577360", + "step": 1859, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:46.612402", + "step": 1859, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023406516760587692, + "timestamp": "2025-09-10 02:19:46.647250", + "step": 1860, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:46.677341", + "step": 1860, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009630659595131874, + "timestamp": "2025-09-10 02:19:46.679555", + "step": 1861, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:19:46.721859", + "step": 1861, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010469197295606136, + "timestamp": "2025-09-10 02:19:46.739610", + "step": 1862, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:19:46.779453", + "step": 1862, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004274784587323666, + "timestamp": "2025-09-10 02:19:46.795659", + "step": 1863, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:46.826679", + "step": 1863, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020305419340729713, + "timestamp": "2025-09-10 02:19:46.851960", + "step": 1864, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:46.882972", + "step": 1864, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01911826804280281, + "timestamp": "2025-09-10 02:19:46.893468", + "step": 1865, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:46.924922", + "step": 1865, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006116253789514303, + "timestamp": "2025-09-10 02:19:46.932746", + "step": 1866, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:46.963830", + "step": 1866, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03426285460591316, + "timestamp": "2025-09-10 02:19:46.974691", + "step": 1867, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:47.005402", + "step": 1867, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01511828787624836, + "timestamp": "2025-09-10 02:19:47.037027", + "step": 1868, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:47.067742", + "step": 1868, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03478477522730827, + "timestamp": "2025-09-10 02:19:47.072398", + "step": 1869, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:47.105643", + "step": 1869, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0066609373316168785, + "timestamp": "2025-09-10 02:19:47.119012", + "step": 1870, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:47.149847", + "step": 1870, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009698964655399323, + "timestamp": "2025-09-10 02:19:47.156680", + "step": 1871, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:47.187143", + "step": 1871, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027642009779810905, + "timestamp": "2025-09-10 02:19:47.218357", + "step": 1872, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:47.248765", + "step": 1872, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006270275916904211, + "timestamp": "2025-09-10 02:19:47.253531", + "step": 1873, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:19:47.283393", + "step": 1873, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0051859780214726925, + "timestamp": "2025-09-10 02:19:47.285402", + "step": 1874, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:19:47.323399", + "step": 1874, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013219809159636497, + "timestamp": "2025-09-10 02:19:47.338950", + "step": 1875, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:47.369922", + "step": 1875, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00313380965963006, + "timestamp": "2025-09-10 02:19:47.395333", + "step": 1876, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:47.426344", + "step": 1876, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017517106607556343, + "timestamp": "2025-09-10 02:19:47.436727", + "step": 1877, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:47.468272", + "step": 1877, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04642176628112793, + "timestamp": "2025-09-10 02:19:47.478627", + "step": 1878, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:47.509259", + "step": 1878, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015917208045721054, + "timestamp": "2025-09-10 02:19:47.516252", + "step": 1879, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:19:47.553668", + "step": 1879, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01571694202721119, + "timestamp": "2025-09-10 02:19:47.590183", + "step": 1880, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:47.621751", + "step": 1880, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007821121253073215, + "timestamp": "2025-09-10 02:19:47.626878", + "step": 1881, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:47.658608", + "step": 1881, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024149566888809204, + "timestamp": "2025-09-10 02:19:47.666057", + "step": 1882, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:47.702178", + "step": 1882, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00780960638076067, + "timestamp": "2025-09-10 02:19:47.715829", + "step": 1883, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:47.747249", + "step": 1883, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009634777903556824, + "timestamp": "2025-09-10 02:19:47.775606", + "step": 1884, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:47.806891", + "step": 1884, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004729505628347397, + "timestamp": "2025-09-10 02:19:47.811470", + "step": 1885, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:47.843086", + "step": 1885, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028273126110434532, + "timestamp": "2025-09-10 02:19:47.855626", + "step": 1886, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:47.887268", + "step": 1886, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0056550041772425175, + "timestamp": "2025-09-10 02:19:47.894317", + "step": 1887, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:47.925933", + "step": 1887, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0050589581951498985, + "timestamp": "2025-09-10 02:19:47.954221", + "step": 1888, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:47.984999", + "step": 1888, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01905803009867668, + "timestamp": "2025-09-10 02:19:47.990440", + "step": 1889, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:48.023540", + "step": 1889, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011093123815953732, + "timestamp": "2025-09-10 02:19:48.030626", + "step": 1890, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:48.062235", + "step": 1890, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006900664884597063, + "timestamp": "2025-09-10 02:19:48.070068", + "step": 1891, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:48.101187", + "step": 1891, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006212836597114801, + "timestamp": "2025-09-10 02:19:48.128957", + "step": 1892, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:48.160628", + "step": 1892, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004362097941339016, + "timestamp": "2025-09-10 02:19:48.165615", + "step": 1893, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:19:48.221178", + "step": 1893, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012452795170247555, + "timestamp": "2025-09-10 02:19:48.244554", + "step": 1894, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:19:48.276367", + "step": 1894, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01033748872578144, + "timestamp": "2025-09-10 02:19:48.287260", + "step": 1895, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:48.318079", + "step": 1895, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016262022778391838, + "timestamp": "2025-09-10 02:19:48.345998", + "step": 1896, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:19:48.382681", + "step": 1896, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009804087691009045, + "timestamp": "2025-09-10 02:19:48.398120", + "step": 1897, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:48.429228", + "step": 1897, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022602048702538013, + "timestamp": "2025-09-10 02:19:48.436055", + "step": 1898, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:48.472753", + "step": 1898, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0055809845216572285, + "timestamp": "2025-09-10 02:19:48.480563", + "step": 1899, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:48.518880", + "step": 1899, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007991933263838291, + "timestamp": "2025-09-10 02:19:48.551926", + "step": 1900, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:19:48.588105", + "step": 1900, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006761971395462751, + "timestamp": "2025-09-10 02:19:48.601170", + "step": 1901, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:48.633355", + "step": 1901, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002031755167990923, + "timestamp": "2025-09-10 02:19:48.645582", + "step": 1902, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:48.678384", + "step": 1902, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003453353885561228, + "timestamp": "2025-09-10 02:19:48.688607", + "step": 1903, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:48.743530", + "step": 1903, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004564212169498205, + "timestamp": "2025-09-10 02:19:48.778028", + "step": 1904, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:48.810511", + "step": 1904, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018804430961608887, + "timestamp": "2025-09-10 02:19:48.815823", + "step": 1905, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:19:48.847065", + "step": 1905, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012097448110580444, + "timestamp": "2025-09-10 02:19:48.851347", + "step": 1906, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:19:48.885945", + "step": 1906, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003606958081945777, + "timestamp": "2025-09-10 02:19:48.898477", + "step": 1907, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:48.933153", + "step": 1907, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0061880433931946754, + "timestamp": "2025-09-10 02:19:48.964240", + "step": 1908, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:19:48.996907", + "step": 1908, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004639564547687769, + "timestamp": "2025-09-10 02:19:48.999361", + "step": 1909, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:49.030532", + "step": 1909, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022573911119252443, + "timestamp": "2025-09-10 02:19:49.037363", + "step": 1910, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:49.068175", + "step": 1910, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006169704254716635, + "timestamp": "2025-09-10 02:19:49.078299", + "step": 1911, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:19:59.318357", + "step": 1911, + "epoch": 1 + }, + { + "type": "pplx", + "content": 15748464.88131854, + "timestamp": "2025-09-10 02:19:59.321577", + "step": 1911, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:19:59.355005", + "step": 1911, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01734175719320774, + "timestamp": "2025-09-10 02:19:59.389250", + "step": 1912, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:59.431938", + "step": 1912, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001922283903695643, + "timestamp": "2025-09-10 02:19:59.436388", + "step": 1913, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:59.467171", + "step": 1913, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028175072744488716, + "timestamp": "2025-09-10 02:19:59.470920", + "step": 1914, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:59.502668", + "step": 1914, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016676677390933037, + "timestamp": "2025-09-10 02:19:59.508746", + "step": 1915, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:19:59.544225", + "step": 1915, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025236140936613083, + "timestamp": "2025-09-10 02:19:59.578750", + "step": 1916, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:19:59.612425", + "step": 1916, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012673401273787022, + "timestamp": "2025-09-10 02:19:59.614714", + "step": 1917, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:19:59.648493", + "step": 1917, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010366697795689106, + "timestamp": "2025-09-10 02:19:59.654277", + "step": 1918, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:19:59.686309", + "step": 1918, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02431515045464039, + "timestamp": "2025-09-10 02:19:59.695584", + "step": 1919, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:19:59.726935", + "step": 1919, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014513040892779827, + "timestamp": "2025-09-10 02:19:59.754412", + "step": 1920, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:19:59.786752", + "step": 1920, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006609838455915451, + "timestamp": "2025-09-10 02:19:59.791424", + "step": 1921, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:19:59.827048", + "step": 1921, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006101998034864664, + "timestamp": "2025-09-10 02:19:59.841049", + "step": 1922, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:19:59.873147", + "step": 1922, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.036406856030225754, + "timestamp": "2025-09-10 02:19:59.880180", + "step": 1923, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:19:59.912547", + "step": 1923, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034558676183223724, + "timestamp": "2025-09-10 02:19:59.937079", + "step": 1924, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:19:59.969338", + "step": 1924, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00515876105055213, + "timestamp": "2025-09-10 02:19:59.978570", + "step": 1925, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:00.013280", + "step": 1925, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014893017709255219, + "timestamp": "2025-09-10 02:20:00.016983", + "step": 1926, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:00.051875", + "step": 1926, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015396283939480782, + "timestamp": "2025-09-10 02:20:00.056401", + "step": 1927, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:00.095090", + "step": 1927, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013009021990001202, + "timestamp": "2025-09-10 02:20:00.119808", + "step": 1928, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:00.155475", + "step": 1928, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026162832975387573, + "timestamp": "2025-09-10 02:20:00.164772", + "step": 1929, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:00.203437", + "step": 1929, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01741965487599373, + "timestamp": "2025-09-10 02:20:00.215425", + "step": 1930, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:00.251965", + "step": 1930, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002141000237315893, + "timestamp": "2025-09-10 02:20:00.261627", + "step": 1931, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:00.303520", + "step": 1931, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038426872342824936, + "timestamp": "2025-09-10 02:20:00.334654", + "step": 1932, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:00.374816", + "step": 1932, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010318142361938953, + "timestamp": "2025-09-10 02:20:00.382382", + "step": 1933, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:00.423403", + "step": 1933, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026648273691534996, + "timestamp": "2025-09-10 02:20:00.433237", + "step": 1934, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:00.476849", + "step": 1934, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04163838550448418, + "timestamp": "2025-09-10 02:20:00.483471", + "step": 1935, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:00.516025", + "step": 1935, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006355960853397846, + "timestamp": "2025-09-10 02:20:00.544171", + "step": 1936, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:00.577147", + "step": 1936, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002842534566298127, + "timestamp": "2025-09-10 02:20:00.586499", + "step": 1937, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:00.617329", + "step": 1937, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015678989002481103, + "timestamp": "2025-09-10 02:20:00.629113", + "step": 1938, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:00.660175", + "step": 1938, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016521496698260307, + "timestamp": "2025-09-10 02:20:00.666949", + "step": 1939, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:00.699137", + "step": 1939, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0046984353102743626, + "timestamp": "2025-09-10 02:20:00.727048", + "step": 1940, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:00.758090", + "step": 1940, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004027045797556639, + "timestamp": "2025-09-10 02:20:00.762685", + "step": 1941, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:00.793799", + "step": 1941, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02891431376338005, + "timestamp": "2025-09-10 02:20:00.800863", + "step": 1942, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:00.834601", + "step": 1942, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016804974526166916, + "timestamp": "2025-09-10 02:20:00.841617", + "step": 1943, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:00.873058", + "step": 1943, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005456176120787859, + "timestamp": "2025-09-10 02:20:00.904001", + "step": 1944, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:00.935753", + "step": 1944, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002787849633023143, + "timestamp": "2025-09-10 02:20:00.940304", + "step": 1945, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:00.972908", + "step": 1945, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021894289180636406, + "timestamp": "2025-09-10 02:20:00.983091", + "step": 1946, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:01.014659", + "step": 1946, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008030838333070278, + "timestamp": "2025-09-10 02:20:01.026613", + "step": 1947, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:01.058472", + "step": 1947, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029469074681401253, + "timestamp": "2025-09-10 02:20:01.086618", + "step": 1948, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:01.117184", + "step": 1948, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004061527084559202, + "timestamp": "2025-09-10 02:20:01.119788", + "step": 1949, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:01.152023", + "step": 1949, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010181749239563942, + "timestamp": "2025-09-10 02:20:01.158883", + "step": 1950, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:01.189424", + "step": 1950, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021814599633216858, + "timestamp": "2025-09-10 02:20:01.193951", + "step": 1951, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:01.228797", + "step": 1951, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009718448854982853, + "timestamp": "2025-09-10 02:20:01.257497", + "step": 1952, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:01.289417", + "step": 1952, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005462608300149441, + "timestamp": "2025-09-10 02:20:01.296372", + "step": 1953, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:01.327704", + "step": 1953, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002389210741966963, + "timestamp": "2025-09-10 02:20:01.334816", + "step": 1954, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:01.365920", + "step": 1954, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015777194639667869, + "timestamp": "2025-09-10 02:20:01.375825", + "step": 1955, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:01.406777", + "step": 1955, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007745720446109772, + "timestamp": "2025-09-10 02:20:01.435074", + "step": 1956, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:01.468323", + "step": 1956, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009010471403598785, + "timestamp": "2025-09-10 02:20:01.481413", + "step": 1957, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:01.514875", + "step": 1957, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006604184862226248, + "timestamp": "2025-09-10 02:20:01.528177", + "step": 1958, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:01.561394", + "step": 1958, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016930067213252187, + "timestamp": "2025-09-10 02:20:01.569093", + "step": 1959, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:01.601187", + "step": 1959, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016999879851937294, + "timestamp": "2025-09-10 02:20:01.633933", + "step": 1960, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:01.664733", + "step": 1960, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018128232331946492, + "timestamp": "2025-09-10 02:20:01.669447", + "step": 1961, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:01.700432", + "step": 1961, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029406710527837276, + "timestamp": "2025-09-10 02:20:01.708110", + "step": 1962, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:01.740014", + "step": 1962, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011884416453540325, + "timestamp": "2025-09-10 02:20:01.746627", + "step": 1963, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:01.777969", + "step": 1963, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011890656314790249, + "timestamp": "2025-09-10 02:20:01.808641", + "step": 1964, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:01.842975", + "step": 1964, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012111729942262173, + "timestamp": "2025-09-10 02:20:01.847753", + "step": 1965, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:01.878753", + "step": 1965, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00835806131362915, + "timestamp": "2025-09-10 02:20:01.881443", + "step": 1966, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:01.912407", + "step": 1966, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01073089987039566, + "timestamp": "2025-09-10 02:20:01.924880", + "step": 1967, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:01.956058", + "step": 1967, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0013615777716040611, + "timestamp": "2025-09-10 02:20:01.980552", + "step": 1968, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:02.011121", + "step": 1968, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026787450537085533, + "timestamp": "2025-09-10 02:20:02.015414", + "step": 1969, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:02.046192", + "step": 1969, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024200987070798874, + "timestamp": "2025-09-10 02:20:02.052835", + "step": 1970, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:02.095175", + "step": 1970, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022902294993400574, + "timestamp": "2025-09-10 02:20:02.097512", + "step": 1971, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:02.129584", + "step": 1971, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01670275256037712, + "timestamp": "2025-09-10 02:20:02.162362", + "step": 1972, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:02.194796", + "step": 1972, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004363041836768389, + "timestamp": "2025-09-10 02:20:02.196975", + "step": 1973, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:20:02.233718", + "step": 1973, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007900647819042206, + "timestamp": "2025-09-10 02:20:02.247731", + "step": 1974, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:02.279427", + "step": 1974, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023129496723413467, + "timestamp": "2025-09-10 02:20:02.283217", + "step": 1975, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:02.314827", + "step": 1975, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0061828577890992165, + "timestamp": "2025-09-10 02:20:02.343125", + "step": 1976, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:20:02.380946", + "step": 1976, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011836091056466103, + "timestamp": "2025-09-10 02:20:02.396599", + "step": 1977, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:02.427786", + "step": 1977, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.054816387593746185, + "timestamp": "2025-09-10 02:20:02.435329", + "step": 1978, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:02.467061", + "step": 1978, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0033305014949291945, + "timestamp": "2025-09-10 02:20:02.477014", + "step": 1979, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:02.511551", + "step": 1979, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001081528840586543, + "timestamp": "2025-09-10 02:20:02.539830", + "step": 1980, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:02.580188", + "step": 1980, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03720958158373833, + "timestamp": "2025-09-10 02:20:02.585005", + "step": 1981, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:02.627441", + "step": 1981, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017248960211873055, + "timestamp": "2025-09-10 02:20:02.631086", + "step": 1982, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:20:02.673598", + "step": 1982, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006337800528854132, + "timestamp": "2025-09-10 02:20:02.689914", + "step": 1983, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:02.724966", + "step": 1983, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.038486216217279434, + "timestamp": "2025-09-10 02:20:02.751861", + "step": 1984, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:02.783687", + "step": 1984, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01467389427125454, + "timestamp": "2025-09-10 02:20:02.788693", + "step": 1985, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:02.819594", + "step": 1985, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01384007465094328, + "timestamp": "2025-09-10 02:20:02.823988", + "step": 1986, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:02.855334", + "step": 1986, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029948865994811058, + "timestamp": "2025-09-10 02:20:02.862195", + "step": 1987, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:02.893690", + "step": 1987, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007479586638510227, + "timestamp": "2025-09-10 02:20:02.921136", + "step": 1988, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:02.951996", + "step": 1988, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00857719499617815, + "timestamp": "2025-09-10 02:20:02.956381", + "step": 1989, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:02.988249", + "step": 1989, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012705625034868717, + "timestamp": "2025-09-10 02:20:02.998080", + "step": 1990, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:03.030003", + "step": 1990, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015617369674146175, + "timestamp": "2025-09-10 02:20:03.036771", + "step": 1991, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:03.068372", + "step": 1991, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014881722629070282, + "timestamp": "2025-09-10 02:20:03.101574", + "step": 1992, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:03.132743", + "step": 1992, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012684579007327557, + "timestamp": "2025-09-10 02:20:03.135471", + "step": 1993, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:03.167036", + "step": 1993, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00392954470589757, + "timestamp": "2025-09-10 02:20:03.173954", + "step": 1994, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:03.206365", + "step": 1994, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003335257526487112, + "timestamp": "2025-09-10 02:20:03.213851", + "step": 1995, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:03.244516", + "step": 1995, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014795198105275631, + "timestamp": "2025-09-10 02:20:03.269528", + "step": 1996, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:03.300340", + "step": 1996, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003911779262125492, + "timestamp": "2025-09-10 02:20:03.305613", + "step": 1997, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:03.335757", + "step": 1997, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0010702766012400389, + "timestamp": "2025-09-10 02:20:03.346036", + "step": 1998, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:03.378598", + "step": 1998, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006708834785968065, + "timestamp": "2025-09-10 02:20:03.383112", + "step": 1999, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:03.413851", + "step": 1999, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018123431131243706, + "timestamp": "2025-09-10 02:20:03.446982", + "step": 2000, + "epoch": 1 + }, + { + "type": "info", + "content": "Checkpoint saved at step 2000", + "timestamp": "2025-09-10 02:20:08.143067", + "step": 2000, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:08.176414", + "step": 2000, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013308617286384106, + "timestamp": "2025-09-10 02:20:08.184018", + "step": 2001, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:08.216585", + "step": 2001, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009740256704390049, + "timestamp": "2025-09-10 02:20:08.220053", + "step": 2002, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:08.255010", + "step": 2002, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002783420728519559, + "timestamp": "2025-09-10 02:20:08.259076", + "step": 2003, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:08.290467", + "step": 2003, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016279999166727066, + "timestamp": "2025-09-10 02:20:08.317646", + "step": 2004, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:08.350363", + "step": 2004, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002739792922511697, + "timestamp": "2025-09-10 02:20:08.360066", + "step": 2005, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:08.391551", + "step": 2005, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065587302669882774, + "timestamp": "2025-09-10 02:20:08.398108", + "step": 2006, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:08.429557", + "step": 2006, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03201238065958023, + "timestamp": "2025-09-10 02:20:08.433423", + "step": 2007, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:08.465425", + "step": 2007, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022500918712466955, + "timestamp": "2025-09-10 02:20:08.496454", + "step": 2008, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:08.528796", + "step": 2008, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04503735154867172, + "timestamp": "2025-09-10 02:20:08.535233", + "step": 2009, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:08.566556", + "step": 2009, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0013476565945893526, + "timestamp": "2025-09-10 02:20:08.574229", + "step": 2010, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:08.606641", + "step": 2010, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0042086499743163586, + "timestamp": "2025-09-10 02:20:08.608893", + "step": 2011, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:08.640311", + "step": 2011, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006380919367074966, + "timestamp": "2025-09-10 02:20:08.667889", + "step": 2012, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:08.699255", + "step": 2012, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02008945681154728, + "timestamp": "2025-09-10 02:20:08.703452", + "step": 2013, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:08.734085", + "step": 2013, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0637151375412941, + "timestamp": "2025-09-10 02:20:08.736305", + "step": 2014, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:08.767522", + "step": 2014, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012976779835298657, + "timestamp": "2025-09-10 02:20:08.771683", + "step": 2015, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:08.802724", + "step": 2015, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013720971532166004, + "timestamp": "2025-09-10 02:20:08.831025", + "step": 2016, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:08.862613", + "step": 2016, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017362453043460846, + "timestamp": "2025-09-10 02:20:08.865166", + "step": 2017, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:08.896836", + "step": 2017, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005491399206221104, + "timestamp": "2025-09-10 02:20:08.904248", + "step": 2018, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:08.935110", + "step": 2018, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01876024715602398, + "timestamp": "2025-09-10 02:20:08.939187", + "step": 2019, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:08.972204", + "step": 2019, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009473063051700592, + "timestamp": "2025-09-10 02:20:08.996335", + "step": 2020, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:09.029022", + "step": 2020, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01576576568186283, + "timestamp": "2025-09-10 02:20:09.036361", + "step": 2021, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:09.071077", + "step": 2021, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030170930549502373, + "timestamp": "2025-09-10 02:20:09.084825", + "step": 2022, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:09.120147", + "step": 2022, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02932755835354328, + "timestamp": "2025-09-10 02:20:09.133836", + "step": 2023, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:09.165574", + "step": 2023, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012282658135518432, + "timestamp": "2025-09-10 02:20:09.193653", + "step": 2024, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:09.226223", + "step": 2024, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006276692263782024, + "timestamp": "2025-09-10 02:20:09.239234", + "step": 2025, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:09.270967", + "step": 2025, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011697587557137012, + "timestamp": "2025-09-10 02:20:09.278708", + "step": 2026, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:09.311121", + "step": 2026, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007251562085002661, + "timestamp": "2025-09-10 02:20:09.318243", + "step": 2027, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:09.349398", + "step": 2027, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00808743666857481, + "timestamp": "2025-09-10 02:20:09.377091", + "step": 2028, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:09.408086", + "step": 2028, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025386668276041746, + "timestamp": "2025-09-10 02:20:09.410319", + "step": 2029, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:09.441351", + "step": 2029, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.059052709490060806, + "timestamp": "2025-09-10 02:20:09.443589", + "step": 2030, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:09.476520", + "step": 2030, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01944047398865223, + "timestamp": "2025-09-10 02:20:09.484150", + "step": 2031, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:09.518094", + "step": 2031, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01776472106575966, + "timestamp": "2025-09-10 02:20:09.552272", + "step": 2032, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:09.586355", + "step": 2032, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0041867028921842575, + "timestamp": "2025-09-10 02:20:09.591297", + "step": 2033, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:09.624274", + "step": 2033, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016866056248545647, + "timestamp": "2025-09-10 02:20:09.636312", + "step": 2034, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:09.667931", + "step": 2034, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009813317097723484, + "timestamp": "2025-09-10 02:20:09.674821", + "step": 2035, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:20:09.720805", + "step": 2035, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009570365771651268, + "timestamp": "2025-09-10 02:20:09.760848", + "step": 2036, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:09.793278", + "step": 2036, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011103704571723938, + "timestamp": "2025-09-10 02:20:09.797984", + "step": 2037, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:09.832433", + "step": 2037, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016965234652161598, + "timestamp": "2025-09-10 02:20:09.845793", + "step": 2038, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:09.877310", + "step": 2038, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012328843586146832, + "timestamp": "2025-09-10 02:20:09.883972", + "step": 2039, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:09.915267", + "step": 2039, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01965012215077877, + "timestamp": "2025-09-10 02:20:09.943494", + "step": 2040, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:09.975189", + "step": 2040, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015359156765043736, + "timestamp": "2025-09-10 02:20:09.979739", + "step": 2041, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:10.015411", + "step": 2041, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025193924084305763, + "timestamp": "2025-09-10 02:20:10.029190", + "step": 2042, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:10.063910", + "step": 2042, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006153655704110861, + "timestamp": "2025-09-10 02:20:10.070617", + "step": 2043, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:20:10.109571", + "step": 2043, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003595164744183421, + "timestamp": "2025-09-10 02:20:10.146348", + "step": 2044, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:10.179076", + "step": 2044, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002315750578418374, + "timestamp": "2025-09-10 02:20:10.191421", + "step": 2045, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:10.222927", + "step": 2045, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01863052323460579, + "timestamp": "2025-09-10 02:20:10.227211", + "step": 2046, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:10.266149", + "step": 2046, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02388039045035839, + "timestamp": "2025-09-10 02:20:10.270375", + "step": 2047, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:10.302182", + "step": 2047, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014288315549492836, + "timestamp": "2025-09-10 02:20:10.332651", + "step": 2048, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:10.364502", + "step": 2048, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02336038462817669, + "timestamp": "2025-09-10 02:20:10.366451", + "step": 2049, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:10.399108", + "step": 2049, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03333750367164612, + "timestamp": "2025-09-10 02:20:10.406210", + "step": 2050, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:10.439798", + "step": 2050, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03487107530236244, + "timestamp": "2025-09-10 02:20:10.453116", + "step": 2051, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:10.488181", + "step": 2051, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008551175706088543, + "timestamp": "2025-09-10 02:20:10.516117", + "step": 2052, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:10.560118", + "step": 2052, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.000608120986726135, + "timestamp": "2025-09-10 02:20:10.565386", + "step": 2053, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:10.603975", + "step": 2053, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0034163747914135456, + "timestamp": "2025-09-10 02:20:10.614377", + "step": 2054, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:10.651410", + "step": 2054, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008907620795071125, + "timestamp": "2025-09-10 02:20:10.656895", + "step": 2055, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:10.692828", + "step": 2055, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022446779534220695, + "timestamp": "2025-09-10 02:20:10.720333", + "step": 2056, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:10.756370", + "step": 2056, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021804099902510643, + "timestamp": "2025-09-10 02:20:10.765348", + "step": 2057, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:10.796437", + "step": 2057, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004856002051383257, + "timestamp": "2025-09-10 02:20:10.806145", + "step": 2058, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:20:21.241490", + "step": 2058, + "epoch": 1 + }, + { + "type": "pplx", + "content": 17864993.387790058, + "timestamp": "2025-09-10 02:20:21.246430", + "step": 2058, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:21.279535", + "step": 2058, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02261553891003132, + "timestamp": "2025-09-10 02:20:21.287744", + "step": 2059, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:21.323575", + "step": 2059, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007157010026276112, + "timestamp": "2025-09-10 02:20:21.357682", + "step": 2060, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:21.389763", + "step": 2060, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026648053899407387, + "timestamp": "2025-09-10 02:20:21.398755", + "step": 2061, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:21.430854", + "step": 2061, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006498201750218868, + "timestamp": "2025-09-10 02:20:21.437552", + "step": 2062, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:21.469463", + "step": 2062, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024175258353352547, + "timestamp": "2025-09-10 02:20:21.476964", + "step": 2063, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:21.509159", + "step": 2063, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007673321757465601, + "timestamp": "2025-09-10 02:20:21.537240", + "step": 2064, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:21.569051", + "step": 2064, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005579050164669752, + "timestamp": "2025-09-10 02:20:21.571289", + "step": 2065, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:21.603339", + "step": 2065, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010995094664394855, + "timestamp": "2025-09-10 02:20:21.610701", + "step": 2066, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:21.643363", + "step": 2066, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03568677604198456, + "timestamp": "2025-09-10 02:20:21.655675", + "step": 2067, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:21.687456", + "step": 2067, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00576269906014204, + "timestamp": "2025-09-10 02:20:21.720291", + "step": 2068, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:21.753413", + "step": 2068, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014195759780704975, + "timestamp": "2025-09-10 02:20:21.758422", + "step": 2069, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:21.792123", + "step": 2069, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0028361105360090733, + "timestamp": "2025-09-10 02:20:21.802984", + "step": 2070, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:21.837230", + "step": 2070, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003535019000992179, + "timestamp": "2025-09-10 02:20:21.844518", + "step": 2071, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:21.875162", + "step": 2071, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006695587653666735, + "timestamp": "2025-09-10 02:20:21.903471", + "step": 2072, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:21.933981", + "step": 2072, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017478538677096367, + "timestamp": "2025-09-10 02:20:21.938764", + "step": 2073, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:21.969929", + "step": 2073, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005236539524048567, + "timestamp": "2025-09-10 02:20:21.982093", + "step": 2074, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:22.016908", + "step": 2074, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020143600180745125, + "timestamp": "2025-09-10 02:20:22.030586", + "step": 2075, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:22.061615", + "step": 2075, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017758900299668312, + "timestamp": "2025-09-10 02:20:22.085241", + "step": 2076, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:22.115780", + "step": 2076, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014581401832401752, + "timestamp": "2025-09-10 02:20:22.118185", + "step": 2077, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:22.150212", + "step": 2077, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038522446993738413, + "timestamp": "2025-09-10 02:20:22.157895", + "step": 2078, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:22.191209", + "step": 2078, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006819657050073147, + "timestamp": "2025-09-10 02:20:22.198954", + "step": 2079, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:20:22.241658", + "step": 2079, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016683044377714396, + "timestamp": "2025-09-10 02:20:22.280290", + "step": 2080, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:22.311231", + "step": 2080, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006357488688081503, + "timestamp": "2025-09-10 02:20:22.316660", + "step": 2081, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:22.347407", + "step": 2081, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026342766359448433, + "timestamp": "2025-09-10 02:20:22.354544", + "step": 2082, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:22.385455", + "step": 2082, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021110793575644493, + "timestamp": "2025-09-10 02:20:22.397683", + "step": 2083, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:22.431408", + "step": 2083, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007887489162385464, + "timestamp": "2025-09-10 02:20:22.456451", + "step": 2084, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:22.493285", + "step": 2084, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01206361036747694, + "timestamp": "2025-09-10 02:20:22.508412", + "step": 2085, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:22.539607", + "step": 2085, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012552806874737144, + "timestamp": "2025-09-10 02:20:22.546422", + "step": 2086, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:22.578638", + "step": 2086, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008134759962558746, + "timestamp": "2025-09-10 02:20:22.585542", + "step": 2087, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:20:22.620360", + "step": 2087, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027143345214426517, + "timestamp": "2025-09-10 02:20:22.655226", + "step": 2088, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:22.688088", + "step": 2088, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0388091541826725, + "timestamp": "2025-09-10 02:20:22.690414", + "step": 2089, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:22.723556", + "step": 2089, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0491640567779541, + "timestamp": "2025-09-10 02:20:22.734527", + "step": 2090, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:22.772601", + "step": 2090, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006399436388164759, + "timestamp": "2025-09-10 02:20:22.788192", + "step": 2091, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:22.821167", + "step": 2091, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011510169133543968, + "timestamp": "2025-09-10 02:20:22.845097", + "step": 2092, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:22.881376", + "step": 2092, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.07845025509595871, + "timestamp": "2025-09-10 02:20:22.896561", + "step": 2093, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:22.929539", + "step": 2093, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.026220111176371574, + "timestamp": "2025-09-10 02:20:22.936638", + "step": 2094, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:22.968347", + "step": 2094, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02482834830880165, + "timestamp": "2025-09-10 02:20:22.978699", + "step": 2095, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:23.010557", + "step": 2095, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017762552946805954, + "timestamp": "2025-09-10 02:20:23.038344", + "step": 2096, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:23.071589", + "step": 2096, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0069184741005301476, + "timestamp": "2025-09-10 02:20:23.078394", + "step": 2097, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:23.110349", + "step": 2097, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008609072305262089, + "timestamp": "2025-09-10 02:20:23.116652", + "step": 2098, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:23.147965", + "step": 2098, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001422119210474193, + "timestamp": "2025-09-10 02:20:23.159655", + "step": 2099, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:23.193309", + "step": 2099, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006961984094232321, + "timestamp": "2025-09-10 02:20:23.224650", + "step": 2100, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:23.255497", + "step": 2100, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065904962830245495, + "timestamp": "2025-09-10 02:20:23.257675", + "step": 2101, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:23.288200", + "step": 2101, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004559192340821028, + "timestamp": "2025-09-10 02:20:23.292701", + "step": 2102, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:23.324242", + "step": 2102, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01074813213199377, + "timestamp": "2025-09-10 02:20:23.331808", + "step": 2103, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:23.363551", + "step": 2103, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017620306462049484, + "timestamp": "2025-09-10 02:20:23.395347", + "step": 2104, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:23.427142", + "step": 2104, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008062370121479034, + "timestamp": "2025-09-10 02:20:23.439784", + "step": 2105, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:23.471788", + "step": 2105, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.029557526111602783, + "timestamp": "2025-09-10 02:20:23.476036", + "step": 2106, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:23.510903", + "step": 2106, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014713338576257229, + "timestamp": "2025-09-10 02:20:23.524282", + "step": 2107, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:23.555121", + "step": 2107, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011615300551056862, + "timestamp": "2025-09-10 02:20:23.580582", + "step": 2108, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:23.611130", + "step": 2108, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011783967493101954, + "timestamp": "2025-09-10 02:20:23.619196", + "step": 2109, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:23.651276", + "step": 2109, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006283028516918421, + "timestamp": "2025-09-10 02:20:23.662054", + "step": 2110, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:23.695199", + "step": 2110, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015238684602081776, + "timestamp": "2025-09-10 02:20:23.708698", + "step": 2111, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:23.740097", + "step": 2111, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0074634556658566, + "timestamp": "2025-09-10 02:20:23.770592", + "step": 2112, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:23.805506", + "step": 2112, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011353401467204094, + "timestamp": "2025-09-10 02:20:23.809323", + "step": 2113, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:23.843406", + "step": 2113, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021240845322608948, + "timestamp": "2025-09-10 02:20:23.848788", + "step": 2114, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:23.883014", + "step": 2114, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018865080550312996, + "timestamp": "2025-09-10 02:20:23.888498", + "step": 2115, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:23.922213", + "step": 2115, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019352329894900322, + "timestamp": "2025-09-10 02:20:23.949465", + "step": 2116, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:23.983621", + "step": 2116, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01287752389907837, + "timestamp": "2025-09-10 02:20:23.986675", + "step": 2117, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:24.020402", + "step": 2117, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03353200852870941, + "timestamp": "2025-09-10 02:20:24.023711", + "step": 2118, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:24.057404", + "step": 2118, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0233648419380188, + "timestamp": "2025-09-10 02:20:24.059721", + "step": 2119, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:24.092936", + "step": 2119, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03482295200228691, + "timestamp": "2025-09-10 02:20:24.125226", + "step": 2120, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:24.156601", + "step": 2120, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02343512885272503, + "timestamp": "2025-09-10 02:20:24.158914", + "step": 2121, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:24.190199", + "step": 2121, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022608637809753418, + "timestamp": "2025-09-10 02:20:24.197547", + "step": 2122, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:24.228337", + "step": 2122, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0130180474370718, + "timestamp": "2025-09-10 02:20:24.238347", + "step": 2123, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:24.269288", + "step": 2123, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014483482576906681, + "timestamp": "2025-09-10 02:20:24.296619", + "step": 2124, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:24.328265", + "step": 2124, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.05478040128946304, + "timestamp": "2025-09-10 02:20:24.336772", + "step": 2125, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:24.367265", + "step": 2125, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00959568191319704, + "timestamp": "2025-09-10 02:20:24.374201", + "step": 2126, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:24.406908", + "step": 2126, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012538508512079716, + "timestamp": "2025-09-10 02:20:24.411278", + "step": 2127, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:24.442470", + "step": 2127, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0026257862336933613, + "timestamp": "2025-09-10 02:20:24.473702", + "step": 2128, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:24.503894", + "step": 2128, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018345599994063377, + "timestamp": "2025-09-10 02:20:24.506139", + "step": 2129, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:24.537211", + "step": 2129, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014367531053721905, + "timestamp": "2025-09-10 02:20:24.544118", + "step": 2130, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:24.583299", + "step": 2130, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0052671851590275764, + "timestamp": "2025-09-10 02:20:24.587542", + "step": 2131, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:24.628092", + "step": 2131, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017293930053710938, + "timestamp": "2025-09-10 02:20:24.659286", + "step": 2132, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:24.691742", + "step": 2132, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0051933168433606625, + "timestamp": "2025-09-10 02:20:24.702182", + "step": 2133, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:24.733405", + "step": 2133, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008713253773748875, + "timestamp": "2025-09-10 02:20:24.740295", + "step": 2134, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:24.771556", + "step": 2134, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010111020877957344, + "timestamp": "2025-09-10 02:20:24.783335", + "step": 2135, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:24.821841", + "step": 2135, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014230245724320412, + "timestamp": "2025-09-10 02:20:24.858442", + "step": 2136, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:24.892148", + "step": 2136, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013335539028048515, + "timestamp": "2025-09-10 02:20:24.905294", + "step": 2137, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:24.936397", + "step": 2137, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018712077289819717, + "timestamp": "2025-09-10 02:20:24.943181", + "step": 2138, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:24.976717", + "step": 2138, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028135867789387703, + "timestamp": "2025-09-10 02:20:24.990073", + "step": 2139, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:25.021402", + "step": 2139, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014833973720669746, + "timestamp": "2025-09-10 02:20:25.049743", + "step": 2140, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:25.079948", + "step": 2140, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005817878060042858, + "timestamp": "2025-09-10 02:20:25.082067", + "step": 2141, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:25.112835", + "step": 2141, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00543214799836278, + "timestamp": "2025-09-10 02:20:25.123106", + "step": 2142, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:25.153958", + "step": 2142, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0051437122747302055, + "timestamp": "2025-09-10 02:20:25.164268", + "step": 2143, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:25.198938", + "step": 2143, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004253858234733343, + "timestamp": "2025-09-10 02:20:25.233271", + "step": 2144, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:25.264621", + "step": 2144, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015032351948320866, + "timestamp": "2025-09-10 02:20:25.269699", + "step": 2145, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:25.300931", + "step": 2145, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010134616866707802, + "timestamp": "2025-09-10 02:20:25.313334", + "step": 2146, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:25.344866", + "step": 2146, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003961100243031979, + "timestamp": "2025-09-10 02:20:25.352268", + "step": 2147, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:25.383479", + "step": 2147, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014108018949627876, + "timestamp": "2025-09-10 02:20:25.411763", + "step": 2148, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:25.442846", + "step": 2148, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0111985569819808, + "timestamp": "2025-09-10 02:20:25.447948", + "step": 2149, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:25.477822", + "step": 2149, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02083497866988182, + "timestamp": "2025-09-10 02:20:25.480008", + "step": 2150, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:20:25.514467", + "step": 2150, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005785502027720213, + "timestamp": "2025-09-10 02:20:25.528509", + "step": 2151, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:25.559732", + "step": 2151, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008959316648542881, + "timestamp": "2025-09-10 02:20:25.587452", + "step": 2152, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:25.618341", + "step": 2152, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008241984061896801, + "timestamp": "2025-09-10 02:20:25.628840", + "step": 2153, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:25.659730", + "step": 2153, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006875708233565092, + "timestamp": "2025-09-10 02:20:25.667335", + "step": 2154, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:25.701679", + "step": 2154, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037335853558033705, + "timestamp": "2025-09-10 02:20:25.709348", + "step": 2155, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:25.740435", + "step": 2155, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020355254411697388, + "timestamp": "2025-09-10 02:20:25.768161", + "step": 2156, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:25.801811", + "step": 2156, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009400454349815845, + "timestamp": "2025-09-10 02:20:25.814913", + "step": 2157, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:25.846853", + "step": 2157, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006258614361286163, + "timestamp": "2025-09-10 02:20:25.853851", + "step": 2158, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:25.884388", + "step": 2158, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008656726218760014, + "timestamp": "2025-09-10 02:20:25.891425", + "step": 2159, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:25.921831", + "step": 2159, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004763355012983084, + "timestamp": "2025-09-10 02:20:25.949770", + "step": 2160, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:25.981370", + "step": 2160, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008526794612407684, + "timestamp": "2025-09-10 02:20:25.991080", + "step": 2161, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:26.022544", + "step": 2161, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.055472858250141144, + "timestamp": "2025-09-10 02:20:26.029601", + "step": 2162, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:26.061022", + "step": 2162, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0049760532565414906, + "timestamp": "2025-09-10 02:20:26.065071", + "step": 2163, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:26.100965", + "step": 2163, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02341938205063343, + "timestamp": "2025-09-10 02:20:26.135653", + "step": 2164, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:26.167456", + "step": 2164, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01208476722240448, + "timestamp": "2025-09-10 02:20:26.172621", + "step": 2165, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:26.204912", + "step": 2165, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005906891077756882, + "timestamp": "2025-09-10 02:20:26.215000", + "step": 2166, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:26.246522", + "step": 2166, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001993507379665971, + "timestamp": "2025-09-10 02:20:26.256675", + "step": 2167, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:26.288004", + "step": 2167, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0028882776387035847, + "timestamp": "2025-09-10 02:20:26.319213", + "step": 2168, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:26.350083", + "step": 2168, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018221037462353706, + "timestamp": "2025-09-10 02:20:26.352302", + "step": 2169, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:26.383804", + "step": 2169, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012003413401544094, + "timestamp": "2025-09-10 02:20:26.390523", + "step": 2170, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:26.421987", + "step": 2170, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005830215755850077, + "timestamp": "2025-09-10 02:20:26.432042", + "step": 2171, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:26.463153", + "step": 2171, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025055332109332085, + "timestamp": "2025-09-10 02:20:26.490728", + "step": 2172, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:26.522171", + "step": 2172, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020759152248501778, + "timestamp": "2025-09-10 02:20:26.527153", + "step": 2173, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:26.558543", + "step": 2173, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036895188968628645, + "timestamp": "2025-09-10 02:20:26.565459", + "step": 2174, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:26.599427", + "step": 2174, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021146830171346664, + "timestamp": "2025-09-10 02:20:26.610450", + "step": 2175, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:26.642089", + "step": 2175, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03294152021408081, + "timestamp": "2025-09-10 02:20:26.675459", + "step": 2176, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:26.707514", + "step": 2176, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028937892988324165, + "timestamp": "2025-09-10 02:20:26.709724", + "step": 2177, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:26.741051", + "step": 2177, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005948805715888739, + "timestamp": "2025-09-10 02:20:26.743695", + "step": 2178, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:26.775324", + "step": 2178, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014749663881957531, + "timestamp": "2025-09-10 02:20:26.778142", + "step": 2179, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:26.814127", + "step": 2179, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022330453619360924, + "timestamp": "2025-09-10 02:20:26.848408", + "step": 2180, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:26.880605", + "step": 2180, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009073344990611076, + "timestamp": "2025-09-10 02:20:26.885273", + "step": 2181, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:26.917146", + "step": 2181, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00725422753021121, + "timestamp": "2025-09-10 02:20:26.924616", + "step": 2182, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:26.955341", + "step": 2182, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03919028118252754, + "timestamp": "2025-09-10 02:20:26.959402", + "step": 2183, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:26.990765", + "step": 2183, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03313456103205681, + "timestamp": "2025-09-10 02:20:27.021647", + "step": 2184, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:27.052764", + "step": 2184, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.023840798065066338, + "timestamp": "2025-09-10 02:20:27.055205", + "step": 2185, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:27.087124", + "step": 2185, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008154499344527721, + "timestamp": "2025-09-10 02:20:27.091545", + "step": 2186, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:27.125598", + "step": 2186, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021896088495850563, + "timestamp": "2025-09-10 02:20:27.139435", + "step": 2187, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:27.170364", + "step": 2187, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02253473922610283, + "timestamp": "2025-09-10 02:20:27.198062", + "step": 2188, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:27.229184", + "step": 2188, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020850541070103645, + "timestamp": "2025-09-10 02:20:27.231530", + "step": 2189, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:27.263642", + "step": 2189, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01595452055335045, + "timestamp": "2025-09-10 02:20:27.271524", + "step": 2190, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:20:27.313229", + "step": 2190, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014829051680862904, + "timestamp": "2025-09-10 02:20:27.330363", + "step": 2191, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:27.361318", + "step": 2191, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004625355359166861, + "timestamp": "2025-09-10 02:20:27.385169", + "step": 2192, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:27.418130", + "step": 2192, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004544160328805447, + "timestamp": "2025-09-10 02:20:27.422481", + "step": 2193, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:27.454944", + "step": 2193, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004428909160196781, + "timestamp": "2025-09-10 02:20:27.467282", + "step": 2194, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:27.499428", + "step": 2194, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002593469340354204, + "timestamp": "2025-09-10 02:20:27.506749", + "step": 2195, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:27.538937", + "step": 2195, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0045441207475960255, + "timestamp": "2025-09-10 02:20:27.570800", + "step": 2196, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:27.603970", + "step": 2196, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0163312666118145, + "timestamp": "2025-09-10 02:20:27.606468", + "step": 2197, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:27.638454", + "step": 2197, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008555804379284382, + "timestamp": "2025-09-10 02:20:27.641211", + "step": 2198, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:27.672756", + "step": 2198, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008500153198838234, + "timestamp": "2025-09-10 02:20:27.675218", + "step": 2199, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:27.707569", + "step": 2199, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006550853606313467, + "timestamp": "2025-09-10 02:20:27.732976", + "step": 2200, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:27.764385", + "step": 2200, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013699221424758434, + "timestamp": "2025-09-10 02:20:27.766855", + "step": 2201, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:27.797602", + "step": 2201, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008543393574655056, + "timestamp": "2025-09-10 02:20:27.805319", + "step": 2202, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:27.837410", + "step": 2202, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01858612895011902, + "timestamp": "2025-09-10 02:20:27.844392", + "step": 2203, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:27.875489", + "step": 2203, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01539852935820818, + "timestamp": "2025-09-10 02:20:27.900629", + "step": 2204, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:27.931630", + "step": 2204, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0037464885972440243, + "timestamp": "2025-09-10 02:20:27.936471", + "step": 2205, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:20:38.286390", + "step": 2205, + "epoch": 1 + }, + { + "type": "pplx", + "content": 17307864.411721557, + "timestamp": "2025-09-10 02:20:38.289200", + "step": 2205, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:38.319658", + "step": 2205, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03866380453109741, + "timestamp": "2025-09-10 02:20:38.330198", + "step": 2206, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:38.363594", + "step": 2206, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0060219233855605125, + "timestamp": "2025-09-10 02:20:38.370493", + "step": 2207, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:38.402011", + "step": 2207, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01440694835036993, + "timestamp": "2025-09-10 02:20:38.433053", + "step": 2208, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:38.465167", + "step": 2208, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01927160657942295, + "timestamp": "2025-09-10 02:20:38.469613", + "step": 2209, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:38.500179", + "step": 2209, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011063886806368828, + "timestamp": "2025-09-10 02:20:38.507667", + "step": 2210, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:38.538872", + "step": 2210, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01112239807844162, + "timestamp": "2025-09-10 02:20:38.546466", + "step": 2211, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:38.578628", + "step": 2211, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007352620828896761, + "timestamp": "2025-09-10 02:20:38.606233", + "step": 2212, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:38.638343", + "step": 2212, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004721261560916901, + "timestamp": "2025-09-10 02:20:38.643046", + "step": 2213, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:38.673567", + "step": 2213, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031937905587255955, + "timestamp": "2025-09-10 02:20:38.680945", + "step": 2214, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:38.712648", + "step": 2214, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027609581127762794, + "timestamp": "2025-09-10 02:20:38.723411", + "step": 2215, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:38.754538", + "step": 2215, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.030603447929024696, + "timestamp": "2025-09-10 02:20:38.783175", + "step": 2216, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:38.814814", + "step": 2216, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.037538252770900726, + "timestamp": "2025-09-10 02:20:38.820166", + "step": 2217, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:38.851234", + "step": 2217, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01803760603070259, + "timestamp": "2025-09-10 02:20:38.855760", + "step": 2218, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:38.886704", + "step": 2218, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0046121361665427685, + "timestamp": "2025-09-10 02:20:38.893551", + "step": 2219, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:38.924971", + "step": 2219, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011201003566384315, + "timestamp": "2025-09-10 02:20:38.957648", + "step": 2220, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:38.989597", + "step": 2220, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02293272875249386, + "timestamp": "2025-09-10 02:20:38.994574", + "step": 2221, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:39.025660", + "step": 2221, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010559617541730404, + "timestamp": "2025-09-10 02:20:39.033153", + "step": 2222, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:39.064015", + "step": 2222, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019281571730971336, + "timestamp": "2025-09-10 02:20:39.070877", + "step": 2223, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:39.102032", + "step": 2223, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0065590995363891125, + "timestamp": "2025-09-10 02:20:39.134526", + "step": 2224, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:39.166870", + "step": 2224, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038079393561929464, + "timestamp": "2025-09-10 02:20:39.174362", + "step": 2225, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:39.207567", + "step": 2225, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015453227795660496, + "timestamp": "2025-09-10 02:20:39.219510", + "step": 2226, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:39.250961", + "step": 2226, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004905925132334232, + "timestamp": "2025-09-10 02:20:39.255352", + "step": 2227, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:39.286245", + "step": 2227, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001167826121672988, + "timestamp": "2025-09-10 02:20:39.314110", + "step": 2228, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:39.344406", + "step": 2228, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013551952317357063, + "timestamp": "2025-09-10 02:20:39.346565", + "step": 2229, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:39.377987", + "step": 2229, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005024695303291082, + "timestamp": "2025-09-10 02:20:39.384822", + "step": 2230, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:20:39.427690", + "step": 2230, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.025890955701470375, + "timestamp": "2025-09-10 02:20:39.445135", + "step": 2231, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:39.479800", + "step": 2231, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005515389610081911, + "timestamp": "2025-09-10 02:20:39.511039", + "step": 2232, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:39.542422", + "step": 2232, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.015928935259580612, + "timestamp": "2025-09-10 02:20:39.547540", + "step": 2233, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:39.580999", + "step": 2233, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011972986394539475, + "timestamp": "2025-09-10 02:20:39.588232", + "step": 2234, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:39.622143", + "step": 2234, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00578897912055254, + "timestamp": "2025-09-10 02:20:39.625930", + "step": 2235, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:20:39.664400", + "step": 2235, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0018228079425171018, + "timestamp": "2025-09-10 02:20:39.701132", + "step": 2236, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:39.731827", + "step": 2236, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0009967860532924533, + "timestamp": "2025-09-10 02:20:39.734081", + "step": 2237, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:39.764834", + "step": 2237, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027059337124228477, + "timestamp": "2025-09-10 02:20:39.769194", + "step": 2238, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:39.800765", + "step": 2238, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02666650339961052, + "timestamp": "2025-09-10 02:20:39.808311", + "step": 2239, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:39.840159", + "step": 2239, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.024572152644395828, + "timestamp": "2025-09-10 02:20:39.871790", + "step": 2240, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:39.903716", + "step": 2240, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006256606429815292, + "timestamp": "2025-09-10 02:20:39.908708", + "step": 2241, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:39.943990", + "step": 2241, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009752501733601093, + "timestamp": "2025-09-10 02:20:39.957370", + "step": 2242, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:39.988230", + "step": 2242, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0497528612613678, + "timestamp": "2025-09-10 02:20:39.990533", + "step": 2243, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:40.022012", + "step": 2243, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010288014076650143, + "timestamp": "2025-09-10 02:20:40.049924", + "step": 2244, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:40.082626", + "step": 2244, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004436141811311245, + "timestamp": "2025-09-10 02:20:40.087296", + "step": 2245, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:20:40.129627", + "step": 2245, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016879761591553688, + "timestamp": "2025-09-10 02:20:40.147151", + "step": 2246, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:40.179232", + "step": 2246, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005393616855144501, + "timestamp": "2025-09-10 02:20:40.189267", + "step": 2247, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:40.220928", + "step": 2247, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006263823714107275, + "timestamp": "2025-09-10 02:20:40.251889", + "step": 2248, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:40.284453", + "step": 2248, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019508127588778734, + "timestamp": "2025-09-10 02:20:40.288974", + "step": 2249, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:40.321075", + "step": 2249, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.016456058248877525, + "timestamp": "2025-09-10 02:20:40.323232", + "step": 2250, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:40.354189", + "step": 2250, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015526397619396448, + "timestamp": "2025-09-10 02:20:40.361701", + "step": 2251, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:20:40.401026", + "step": 2251, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002204819582402706, + "timestamp": "2025-09-10 02:20:40.437770", + "step": 2252, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:40.469534", + "step": 2252, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038875446189194918, + "timestamp": "2025-09-10 02:20:40.477752", + "step": 2253, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:40.509005", + "step": 2253, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003834874602034688, + "timestamp": "2025-09-10 02:20:40.515829", + "step": 2254, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:40.546182", + "step": 2254, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007351601030677557, + "timestamp": "2025-09-10 02:20:40.553690", + "step": 2255, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:40.585632", + "step": 2255, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018372252583503723, + "timestamp": "2025-09-10 02:20:40.614197", + "step": 2256, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:40.647153", + "step": 2256, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.013634584844112396, + "timestamp": "2025-09-10 02:20:40.660245", + "step": 2257, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:40.691001", + "step": 2257, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008646705187857151, + "timestamp": "2025-09-10 02:20:40.697964", + "step": 2258, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:40.729949", + "step": 2258, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008055547252297401, + "timestamp": "2025-09-10 02:20:40.737425", + "step": 2259, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:40.769504", + "step": 2259, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019878502935171127, + "timestamp": "2025-09-10 02:20:40.796993", + "step": 2260, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:40.830990", + "step": 2260, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007829791866242886, + "timestamp": "2025-09-10 02:20:40.837243", + "step": 2261, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:40.868790", + "step": 2261, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01760595664381981, + "timestamp": "2025-09-10 02:20:40.872455", + "step": 2262, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:40.910484", + "step": 2262, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019513925071805716, + "timestamp": "2025-09-10 02:20:40.926123", + "step": 2263, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:40.957429", + "step": 2263, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0019971609581261873, + "timestamp": "2025-09-10 02:20:40.985426", + "step": 2264, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:41.016592", + "step": 2264, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006590723525732756, + "timestamp": "2025-09-10 02:20:41.024702", + "step": 2265, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:41.058410", + "step": 2265, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0263630710542202, + "timestamp": "2025-09-10 02:20:41.071772", + "step": 2266, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:41.103787", + "step": 2266, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.003090274054557085, + "timestamp": "2025-09-10 02:20:41.115712", + "step": 2267, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:41.146821", + "step": 2267, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01568448916077614, + "timestamp": "2025-09-10 02:20:41.174954", + "step": 2268, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:41.206104", + "step": 2268, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0363653302192688, + "timestamp": "2025-09-10 02:20:41.215843", + "step": 2269, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:41.247801", + "step": 2269, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014266527257859707, + "timestamp": "2025-09-10 02:20:41.254956", + "step": 2270, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:41.288532", + "step": 2270, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015003466978669167, + "timestamp": "2025-09-10 02:20:41.301902", + "step": 2271, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:41.333348", + "step": 2271, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0023136555682867765, + "timestamp": "2025-09-10 02:20:41.366712", + "step": 2272, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:41.400192", + "step": 2272, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.021259073168039322, + "timestamp": "2025-09-10 02:20:41.404442", + "step": 2273, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:41.436347", + "step": 2273, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0009670493309386075, + "timestamp": "2025-09-10 02:20:41.438630", + "step": 2274, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:41.469990", + "step": 2274, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02005203254520893, + "timestamp": "2025-09-10 02:20:41.472485", + "step": 2275, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:41.504586", + "step": 2275, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0043064602650702, + "timestamp": "2025-09-10 02:20:41.532548", + "step": 2276, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:41.563391", + "step": 2276, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01462549064308405, + "timestamp": "2025-09-10 02:20:41.568013", + "step": 2277, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:41.599710", + "step": 2277, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01992652751505375, + "timestamp": "2025-09-10 02:20:41.606950", + "step": 2278, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:41.638093", + "step": 2278, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0108196334913373, + "timestamp": "2025-09-10 02:20:41.645374", + "step": 2279, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:41.676660", + "step": 2279, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001470404677093029, + "timestamp": "2025-09-10 02:20:41.707742", + "step": 2280, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:41.737903", + "step": 2280, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004251373466104269, + "timestamp": "2025-09-10 02:20:41.742522", + "step": 2281, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:20:41.777090", + "step": 2281, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009216892533004284, + "timestamp": "2025-09-10 02:20:41.791186", + "step": 2282, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:41.826307", + "step": 2282, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020200418308377266, + "timestamp": "2025-09-10 02:20:41.840250", + "step": 2283, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:41.870676", + "step": 2283, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011728627141565084, + "timestamp": "2025-09-10 02:20:41.895569", + "step": 2284, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:20:41.927789", + "step": 2284, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.004072748590260744, + "timestamp": "2025-09-10 02:20:41.940922", + "step": 2285, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:41.971763", + "step": 2285, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03455633297562599, + "timestamp": "2025-09-10 02:20:41.978289", + "step": 2286, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:42.012708", + "step": 2286, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01204759068787098, + "timestamp": "2025-09-10 02:20:42.020129", + "step": 2287, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:42.051484", + "step": 2287, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0008365390822291374, + "timestamp": "2025-09-10 02:20:42.076075", + "step": 2288, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:42.108564", + "step": 2288, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04001276567578316, + "timestamp": "2025-09-10 02:20:42.112886", + "step": 2289, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:42.143380", + "step": 2289, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007685269229114056, + "timestamp": "2025-09-10 02:20:42.147494", + "step": 2290, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:42.178433", + "step": 2290, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0020888035651296377, + "timestamp": "2025-09-10 02:20:42.191101", + "step": 2291, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:42.222633", + "step": 2291, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0040938640013337135, + "timestamp": "2025-09-10 02:20:42.250470", + "step": 2292, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:42.281635", + "step": 2292, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.001731925061903894, + "timestamp": "2025-09-10 02:20:42.286685", + "step": 2293, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:42.317261", + "step": 2293, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.020662318915128708, + "timestamp": "2025-09-10 02:20:42.327342", + "step": 2294, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:42.358388", + "step": 2294, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0022532641887664795, + "timestamp": "2025-09-10 02:20:42.368483", + "step": 2295, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:42.398792", + "step": 2295, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0038333218544721603, + "timestamp": "2025-09-10 02:20:42.424245", + "step": 2296, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:42.455186", + "step": 2296, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014495732262730598, + "timestamp": "2025-09-10 02:20:42.457421", + "step": 2297, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:42.488427", + "step": 2297, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017322422936558723, + "timestamp": "2025-09-10 02:20:42.495776", + "step": 2298, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:42.526511", + "step": 2298, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0031519909389317036, + "timestamp": "2025-09-10 02:20:42.530996", + "step": 2299, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:42.562167", + "step": 2299, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0060340953059494495, + "timestamp": "2025-09-10 02:20:42.593565", + "step": 2300, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:42.624703", + "step": 2300, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0017768697580322623, + "timestamp": "2025-09-10 02:20:42.626635", + "step": 2301, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:42.657184", + "step": 2301, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002710967091843486, + "timestamp": "2025-09-10 02:20:42.661251", + "step": 2302, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:42.692441", + "step": 2302, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002997096860781312, + "timestamp": "2025-09-10 02:20:42.703313", + "step": 2303, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:42.737922", + "step": 2303, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0036649901885539293, + "timestamp": "2025-09-10 02:20:42.772214", + "step": 2304, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:42.802806", + "step": 2304, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01135755330324173, + "timestamp": "2025-09-10 02:20:42.807834", + "step": 2305, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:42.846726", + "step": 2305, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005223255138844252, + "timestamp": "2025-09-10 02:20:42.862333", + "step": 2306, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:42.894696", + "step": 2306, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007239340338855982, + "timestamp": "2025-09-10 02:20:42.902033", + "step": 2307, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:20:42.944257", + "step": 2307, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.017995523288846016, + "timestamp": "2025-09-10 02:20:42.982439", + "step": 2308, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:43.013889", + "step": 2308, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008536996319890022, + "timestamp": "2025-09-10 02:20:43.018208", + "step": 2309, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:43.048443", + "step": 2309, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.028488274663686752, + "timestamp": "2025-09-10 02:20:43.052782", + "step": 2310, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:43.083593", + "step": 2310, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.04561712220311165, + "timestamp": "2025-09-10 02:20:43.088056", + "step": 2311, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:43.119831", + "step": 2311, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012657607905566692, + "timestamp": "2025-09-10 02:20:43.147693", + "step": 2312, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:43.177914", + "step": 2312, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.010057737119495869, + "timestamp": "2025-09-10 02:20:43.182495", + "step": 2313, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:43.215199", + "step": 2313, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019854340702295303, + "timestamp": "2025-09-10 02:20:43.226193", + "step": 2314, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:43.259025", + "step": 2314, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014225935563445091, + "timestamp": "2025-09-10 02:20:43.269294", + "step": 2315, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:43.303944", + "step": 2315, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0027752441819757223, + "timestamp": "2025-09-10 02:20:43.331839", + "step": 2316, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:20:43.362643", + "step": 2316, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006470884662121534, + "timestamp": "2025-09-10 02:20:43.365156", + "step": 2317, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:43.396458", + "step": 2317, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00640989001840353, + "timestamp": "2025-09-10 02:20:43.400569", + "step": 2318, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:43.432483", + "step": 2318, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.01300732046365738, + "timestamp": "2025-09-10 02:20:43.445053", + "step": 2319, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:43.475866", + "step": 2319, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005316443741321564, + "timestamp": "2025-09-10 02:20:43.499172", + "step": 2320, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:43.532720", + "step": 2320, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.014593763276934624, + "timestamp": "2025-09-10 02:20:43.536478", + "step": 2321, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:43.570893", + "step": 2321, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.02588742971420288, + "timestamp": "2025-09-10 02:20:43.584597", + "step": 2322, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:43.620409", + "step": 2322, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002280434826388955, + "timestamp": "2025-09-10 02:20:43.634093", + "step": 2323, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:43.665408", + "step": 2323, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011547980830073357, + "timestamp": "2025-09-10 02:20:43.692821", + "step": 2324, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:43.725615", + "step": 2324, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019922899082303047, + "timestamp": "2025-09-10 02:20:43.733524", + "step": 2325, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:43.764663", + "step": 2325, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0015016960678622127, + "timestamp": "2025-09-10 02:20:43.774636", + "step": 2326, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:20:43.809550", + "step": 2326, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.019193019717931747, + "timestamp": "2025-09-10 02:20:43.823553", + "step": 2327, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:20:43.864804", + "step": 2327, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009289233013987541, + "timestamp": "2025-09-10 02:20:43.902812", + "step": 2328, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:43.937387", + "step": 2328, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0029966922011226416, + "timestamp": "2025-09-10 02:20:43.939136", + "step": 2329, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:43.969998", + "step": 2329, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.011959855444729328, + "timestamp": "2025-09-10 02:20:43.974267", + "step": 2330, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:44.008145", + "step": 2330, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0025715772062540054, + "timestamp": "2025-09-10 02:20:44.021546", + "step": 2331, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:44.053744", + "step": 2331, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007740771863609552, + "timestamp": "2025-09-10 02:20:44.083824", + "step": 2332, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:44.116498", + "step": 2332, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.008812974207103252, + "timestamp": "2025-09-10 02:20:44.120577", + "step": 2333, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:44.153115", + "step": 2333, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009404649026691914, + "timestamp": "2025-09-10 02:20:44.164292", + "step": 2334, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:44.195361", + "step": 2334, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002871322212740779, + "timestamp": "2025-09-10 02:20:44.199476", + "step": 2335, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:44.231014", + "step": 2335, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00894598476588726, + "timestamp": "2025-09-10 02:20:44.259219", + "step": 2336, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:44.290134", + "step": 2336, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0016351427184417844, + "timestamp": "2025-09-10 02:20:44.294752", + "step": 2337, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:20:44.336635", + "step": 2337, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.03164186701178551, + "timestamp": "2025-09-10 02:20:44.353688", + "step": 2338, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:44.385870", + "step": 2338, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.002591783879324794, + "timestamp": "2025-09-10 02:20:44.392350", + "step": 2339, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:44.423985", + "step": 2339, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.006208306644111872, + "timestamp": "2025-09-10 02:20:44.452230", + "step": 2340, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:44.483661", + "step": 2340, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.034006860107183456, + "timestamp": "2025-09-10 02:20:44.496386", + "step": 2341, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:20:44.536358", + "step": 2341, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.005256416741758585, + "timestamp": "2025-09-10 02:20:44.552804", + "step": 2342, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:44.584822", + "step": 2342, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.009249741211533546, + "timestamp": "2025-09-10 02:20:44.591960", + "step": 2343, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:44.622800", + "step": 2343, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007146596908569336, + "timestamp": "2025-09-10 02:20:44.651295", + "step": 2344, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:44.682659", + "step": 2344, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0049681952223181725, + "timestamp": "2025-09-10 02:20:44.688005", + "step": 2345, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:44.718988", + "step": 2345, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.018689943477511406, + "timestamp": "2025-09-10 02:20:44.726369", + "step": 2346, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:44.757158", + "step": 2346, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.022296303883194923, + "timestamp": "2025-09-10 02:20:44.764741", + "step": 2347, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:44.796511", + "step": 2347, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0007350871455855668, + "timestamp": "2025-09-10 02:20:44.828415", + "step": 2348, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:44.859942", + "step": 2348, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0011058534728363156, + "timestamp": "2025-09-10 02:20:44.862115", + "step": 2349, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:44.896598", + "step": 2349, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.027341008186340332, + "timestamp": "2025-09-10 02:20:44.910312", + "step": 2350, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:44.942224", + "step": 2350, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.007282934617251158, + "timestamp": "2025-09-10 02:20:44.949678", + "step": 2351, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:20:45.004125", + "step": 2351, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.00618086289614439, + "timestamp": "2025-09-10 02:20:45.048452", + "step": 2352, + "epoch": 1 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:20:55.262898", + "step": 2352, + "epoch": 1 + }, + { + "type": "pplx", + "content": 18824440.007556766, + "timestamp": "2025-09-10 02:20:55.266064", + "step": 2352, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:55.295642", + "step": 2352, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.012922325171530247, + "timestamp": "2025-09-10 02:20:55.302810", + "step": 2353, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:55.333710", + "step": 2353, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0021893230732530355, + "timestamp": "2025-09-10 02:20:55.341004", + "step": 2354, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:55.371723", + "step": 2354, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0217142216861248, + "timestamp": "2025-09-10 02:20:55.375719", + "step": 2355, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:20:55.422689", + "step": 2355, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0012116653379052877, + "timestamp": "2025-09-10 02:20:55.462794", + "step": 2356, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 3, + 224 + ], + "flops": 4983601869792 + }, + "timestamp": "2025-09-10 02:20:55.515007", + "step": 2356, + "epoch": 1 + }, + { + "type": "loss", + "content": 0.0030051611829549074, + "timestamp": "2025-09-10 02:20:55.517186", + "step": 2357, + "epoch": 1 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:55.565649", + "step": 2357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001868675695732236, + "timestamp": "2025-09-10 02:20:55.570508", + "step": 2358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:55.601669", + "step": 2358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006056786980479956, + "timestamp": "2025-09-10 02:20:55.609223", + "step": 2359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:55.640144", + "step": 2359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01372221577912569, + "timestamp": "2025-09-10 02:20:55.668003", + "step": 2360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:55.698857", + "step": 2360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012454865500330925, + "timestamp": "2025-09-10 02:20:55.703516", + "step": 2361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:55.734399", + "step": 2361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016425231005996466, + "timestamp": "2025-09-10 02:20:55.741488", + "step": 2362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:55.771791", + "step": 2362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011411800980567932, + "timestamp": "2025-09-10 02:20:55.775839", + "step": 2363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:55.807165", + "step": 2363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035575442016124725, + "timestamp": "2025-09-10 02:20:55.835049", + "step": 2364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:55.865716", + "step": 2364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028888003900647163, + "timestamp": "2025-09-10 02:20:55.870861", + "step": 2365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:55.902237", + "step": 2365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013811533339321613, + "timestamp": "2025-09-10 02:20:55.912491", + "step": 2366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:55.943287", + "step": 2366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001658704481087625, + "timestamp": "2025-09-10 02:20:55.951124", + "step": 2367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:55.982180", + "step": 2367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007761240005493164, + "timestamp": "2025-09-10 02:20:56.009977", + "step": 2368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:56.041483", + "step": 2368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019760314375162125, + "timestamp": "2025-09-10 02:20:56.045864", + "step": 2369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:56.076662", + "step": 2369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020639884751290083, + "timestamp": "2025-09-10 02:20:56.081232", + "step": 2370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:56.114724", + "step": 2370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026808734983205795, + "timestamp": "2025-09-10 02:20:56.128079", + "step": 2371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:56.159084", + "step": 2371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017675552517175674, + "timestamp": "2025-09-10 02:20:56.184457", + "step": 2372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:56.215102", + "step": 2372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011209690710529685, + "timestamp": "2025-09-10 02:20:56.217329", + "step": 2373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:56.248801", + "step": 2373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01740814931690693, + "timestamp": "2025-09-10 02:20:56.261180", + "step": 2374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:56.293507", + "step": 2374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014776641502976418, + "timestamp": "2025-09-10 02:20:56.298010", + "step": 2375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:56.329056", + "step": 2375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006744783371686935, + "timestamp": "2025-09-10 02:20:56.360277", + "step": 2376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:56.391300", + "step": 2376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004561256151646376, + "timestamp": "2025-09-10 02:20:56.393677", + "step": 2377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:56.425799", + "step": 2377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02400498278439045, + "timestamp": "2025-09-10 02:20:56.435986", + "step": 2378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:56.467356", + "step": 2378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026017860509455204, + "timestamp": "2025-09-10 02:20:56.471485", + "step": 2379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:56.502242", + "step": 2379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014477769145742059, + "timestamp": "2025-09-10 02:20:56.530026", + "step": 2380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:56.562359", + "step": 2380, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038361712358891964, + "timestamp": "2025-09-10 02:20:56.567242", + "step": 2381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:56.599463", + "step": 2381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015629353001713753, + "timestamp": "2025-09-10 02:20:56.606433", + "step": 2382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:56.639786", + "step": 2382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013419255847111344, + "timestamp": "2025-09-10 02:20:56.647012", + "step": 2383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:56.677425", + "step": 2383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012586474418640137, + "timestamp": "2025-09-10 02:20:56.705245", + "step": 2384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:56.736712", + "step": 2384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011415432207286358, + "timestamp": "2025-09-10 02:20:56.739923", + "step": 2385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:56.773942", + "step": 2385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03539576753973961, + "timestamp": "2025-09-10 02:20:56.781014", + "step": 2386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:56.815401", + "step": 2386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012404642766341567, + "timestamp": "2025-09-10 02:20:56.829064", + "step": 2387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:56.860093", + "step": 2387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017915985081344843, + "timestamp": "2025-09-10 02:20:56.887618", + "step": 2388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:56.919137", + "step": 2388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01956382766366005, + "timestamp": "2025-09-10 02:20:56.923666", + "step": 2389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:56.955340", + "step": 2389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012075236067175865, + "timestamp": "2025-09-10 02:20:56.959135", + "step": 2390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:56.990995", + "step": 2390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015462463721632957, + "timestamp": "2025-09-10 02:20:56.998684", + "step": 2391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:57.029491", + "step": 2391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03371422737836838, + "timestamp": "2025-09-10 02:20:57.060357", + "step": 2392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:57.092148", + "step": 2392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00984213501214981, + "timestamp": "2025-09-10 02:20:57.094323", + "step": 2393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:57.137137", + "step": 2393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032658951822668314, + "timestamp": "2025-09-10 02:20:57.143803", + "step": 2394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:57.176805", + "step": 2394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00632342416793108, + "timestamp": "2025-09-10 02:20:57.180620", + "step": 2395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:57.211466", + "step": 2395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027958959341049194, + "timestamp": "2025-09-10 02:20:57.236629", + "step": 2396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:57.268322", + "step": 2396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007394559681415558, + "timestamp": "2025-09-10 02:20:57.272723", + "step": 2397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:57.305596", + "step": 2397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04190784692764282, + "timestamp": "2025-09-10 02:20:57.312749", + "step": 2398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:57.344112", + "step": 2398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006203221622854471, + "timestamp": "2025-09-10 02:20:57.348174", + "step": 2399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:57.381966", + "step": 2399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003813191084191203, + "timestamp": "2025-09-10 02:20:57.416214", + "step": 2400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:57.449093", + "step": 2400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005948929465375841, + "timestamp": "2025-09-10 02:20:57.453635", + "step": 2401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:57.484630", + "step": 2401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04219771549105644, + "timestamp": "2025-09-10 02:20:57.488654", + "step": 2402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:57.519922", + "step": 2402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006709757260978222, + "timestamp": "2025-09-10 02:20:57.526953", + "step": 2403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:57.557872", + "step": 2403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0066467165015637875, + "timestamp": "2025-09-10 02:20:57.588893", + "step": 2404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:57.620246", + "step": 2404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014069050084799528, + "timestamp": "2025-09-10 02:20:57.624577", + "step": 2405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:57.656236", + "step": 2405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022929934784770012, + "timestamp": "2025-09-10 02:20:57.666779", + "step": 2406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:57.698496", + "step": 2406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03234897926449776, + "timestamp": "2025-09-10 02:20:57.709451", + "step": 2407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:20:57.742453", + "step": 2407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0065283398143947124, + "timestamp": "2025-09-10 02:20:57.766252", + "step": 2408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 928 + ], + "flops": 27527278844800 + }, + "timestamp": "2025-09-10 02:20:57.838691", + "step": 2408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014821560122072697, + "timestamp": "2025-09-10 02:20:57.870500", + "step": 2409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:57.904843", + "step": 2409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003778102109208703, + "timestamp": "2025-09-10 02:20:57.909324", + "step": 2410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:57.943833", + "step": 2410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012805613689124584, + "timestamp": "2025-09-10 02:20:57.954035", + "step": 2411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:57.985528", + "step": 2411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007993345148861408, + "timestamp": "2025-09-10 02:20:58.013838", + "step": 2412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:58.050742", + "step": 2412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006639838218688965, + "timestamp": "2025-09-10 02:20:58.065968", + "step": 2413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:20:58.096623", + "step": 2413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00533502921462059, + "timestamp": "2025-09-10 02:20:58.099109", + "step": 2414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:58.129457", + "step": 2414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02578054927289486, + "timestamp": "2025-09-10 02:20:58.136534", + "step": 2415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:58.167819", + "step": 2415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007064398843795061, + "timestamp": "2025-09-10 02:20:58.198746", + "step": 2416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:58.228972", + "step": 2416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01666208915412426, + "timestamp": "2025-09-10 02:20:58.234269", + "step": 2417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:58.266396", + "step": 2417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036728501319885254, + "timestamp": "2025-09-10 02:20:58.278539", + "step": 2418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:58.314188", + "step": 2418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011314035393297672, + "timestamp": "2025-09-10 02:20:58.320791", + "step": 2419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:58.352422", + "step": 2419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004533576779067516, + "timestamp": "2025-09-10 02:20:58.380797", + "step": 2420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:58.412606", + "step": 2420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005247695837169886, + "timestamp": "2025-09-10 02:20:58.417012", + "step": 2421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:20:58.456744", + "step": 2421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019029613584280014, + "timestamp": "2025-09-10 02:20:58.472721", + "step": 2422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:58.504313", + "step": 2422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012553774751722813, + "timestamp": "2025-09-10 02:20:58.511447", + "step": 2423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:58.541783", + "step": 2423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005390453618019819, + "timestamp": "2025-09-10 02:20:58.566917", + "step": 2424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:58.602231", + "step": 2424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02095671556890011, + "timestamp": "2025-09-10 02:20:58.604478", + "step": 2425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:20:58.634822", + "step": 2425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011055312119424343, + "timestamp": "2025-09-10 02:20:58.642553", + "step": 2426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:20:58.682729", + "step": 2426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010104840621352196, + "timestamp": "2025-09-10 02:20:58.698595", + "step": 2427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:58.730437", + "step": 2427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00237080454826355, + "timestamp": "2025-09-10 02:20:58.758616", + "step": 2428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:20:58.790689", + "step": 2428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009992515668272972, + "timestamp": "2025-09-10 02:20:58.800696", + "step": 2429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:58.833800", + "step": 2429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029752474278211594, + "timestamp": "2025-09-10 02:20:58.840812", + "step": 2430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:58.875836", + "step": 2430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017465923447161913, + "timestamp": "2025-09-10 02:20:58.882412", + "step": 2431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:20:58.913422", + "step": 2431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02968502603471279, + "timestamp": "2025-09-10 02:20:58.945044", + "step": 2432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:58.982449", + "step": 2432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007202841341495514, + "timestamp": "2025-09-10 02:20:58.997633", + "step": 2433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:59.036354", + "step": 2433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028496667742729187, + "timestamp": "2025-09-10 02:20:59.043146", + "step": 2434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:20:59.078570", + "step": 2434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010356120765209198, + "timestamp": "2025-09-10 02:20:59.082482", + "step": 2435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:59.118399", + "step": 2435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010988089255988598, + "timestamp": "2025-09-10 02:20:59.152646", + "step": 2436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:20:59.189861", + "step": 2436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013273806311190128, + "timestamp": "2025-09-10 02:20:59.204988", + "step": 2437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:59.240005", + "step": 2437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0162824559956789, + "timestamp": "2025-09-10 02:20:59.251543", + "step": 2438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:20:59.291319", + "step": 2438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015538596548140049, + "timestamp": "2025-09-10 02:20:59.302867", + "step": 2439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:20:59.336710", + "step": 2439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024137185886502266, + "timestamp": "2025-09-10 02:20:59.367489", + "step": 2440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:59.399054", + "step": 2440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012249810621142387, + "timestamp": "2025-09-10 02:20:59.403560", + "step": 2441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:59.436669", + "step": 2441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014839425683021545, + "timestamp": "2025-09-10 02:20:59.443398", + "step": 2442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:20:59.481062", + "step": 2442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024386601522564888, + "timestamp": "2025-09-10 02:20:59.494734", + "step": 2443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:59.526751", + "step": 2443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007399399299174547, + "timestamp": "2025-09-10 02:20:59.551681", + "step": 2444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:20:59.584018", + "step": 2444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0049104467034339905, + "timestamp": "2025-09-10 02:20:59.588711", + "step": 2445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:20:59.632271", + "step": 2445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0064061665907502174, + "timestamp": "2025-09-10 02:20:59.649910", + "step": 2446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:59.683185", + "step": 2446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026691416278481483, + "timestamp": "2025-09-10 02:20:59.689889", + "step": 2447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:59.724090", + "step": 2447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004400145262479782, + "timestamp": "2025-09-10 02:20:59.751749", + "step": 2448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:20:59.786408", + "step": 2448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002782547613605857, + "timestamp": "2025-09-10 02:20:59.798614", + "step": 2449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:20:59.830361", + "step": 2449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030095087364315987, + "timestamp": "2025-09-10 02:20:59.834411", + "step": 2450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:20:59.867471", + "step": 2450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004379054065793753, + "timestamp": "2025-09-10 02:20:59.874120", + "step": 2451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:20:59.916375", + "step": 2451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009237154386937618, + "timestamp": "2025-09-10 02:20:59.954598", + "step": 2452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:20:59.986634", + "step": 2452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013287164270877838, + "timestamp": "2025-09-10 02:20:59.990682", + "step": 2453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:00.022911", + "step": 2453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038054410833865404, + "timestamp": "2025-09-10 02:21:00.030203", + "step": 2454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:00.061572", + "step": 2454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006211976520717144, + "timestamp": "2025-09-10 02:21:00.065972", + "step": 2455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:00.098272", + "step": 2455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011952430941164494, + "timestamp": "2025-09-10 02:21:00.123510", + "step": 2456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:00.158649", + "step": 2456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012498662807047367, + "timestamp": "2025-09-10 02:21:00.163062", + "step": 2457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:00.196415", + "step": 2457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0178080927580595, + "timestamp": "2025-09-10 02:21:00.198975", + "step": 2458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:00.230365", + "step": 2458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017546426504850388, + "timestamp": "2025-09-10 02:21:00.237228", + "step": 2459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:00.268659", + "step": 2459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023515610024333, + "timestamp": "2025-09-10 02:21:00.297119", + "step": 2460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:00.329743", + "step": 2460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0053445808589458466, + "timestamp": "2025-09-10 02:21:00.338117", + "step": 2461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:00.371739", + "step": 2461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002676423406228423, + "timestamp": "2025-09-10 02:21:00.382019", + "step": 2462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:00.414165", + "step": 2462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004749422427266836, + "timestamp": "2025-09-10 02:21:00.421359", + "step": 2463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:00.453478", + "step": 2463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025678569450974464, + "timestamp": "2025-09-10 02:21:00.480924", + "step": 2464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:00.513252", + "step": 2464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027631347998976707, + "timestamp": "2025-09-10 02:21:00.518313", + "step": 2465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:00.551116", + "step": 2465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007423713803291321, + "timestamp": "2025-09-10 02:21:00.560967", + "step": 2466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:00.592568", + "step": 2466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034356131218373775, + "timestamp": "2025-09-10 02:21:00.599184", + "step": 2467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:00.631300", + "step": 2467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028629249427467585, + "timestamp": "2025-09-10 02:21:00.664453", + "step": 2468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:00.697219", + "step": 2468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003994452767074108, + "timestamp": "2025-09-10 02:21:00.702035", + "step": 2469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:00.733794", + "step": 2469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007717865519225597, + "timestamp": "2025-09-10 02:21:00.740914", + "step": 2470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:00.773340", + "step": 2470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008052381686866283, + "timestamp": "2025-09-10 02:21:00.779842", + "step": 2471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:00.811917", + "step": 2471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023543791845440865, + "timestamp": "2025-09-10 02:21:00.839978", + "step": 2472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:00.872023", + "step": 2472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004500131588429213, + "timestamp": "2025-09-10 02:21:00.876183", + "step": 2473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:00.910614", + "step": 2473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01701648160815239, + "timestamp": "2025-09-10 02:21:00.923993", + "step": 2474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:00.963743", + "step": 2474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04145175218582153, + "timestamp": "2025-09-10 02:21:00.970404", + "step": 2475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:01.003069", + "step": 2475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027746308594942093, + "timestamp": "2025-09-10 02:21:01.036297", + "step": 2476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:01.069237", + "step": 2476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0057420432567596436, + "timestamp": "2025-09-10 02:21:01.081920", + "step": 2477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:01.114764", + "step": 2477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024375727400183678, + "timestamp": "2025-09-10 02:21:01.126557", + "step": 2478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:01.159373", + "step": 2478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004905619192868471, + "timestamp": "2025-09-10 02:21:01.169020", + "step": 2479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:01.200895", + "step": 2479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014483463950455189, + "timestamp": "2025-09-10 02:21:01.228375", + "step": 2480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:01.261928", + "step": 2480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015179364010691643, + "timestamp": "2025-09-10 02:21:01.269896", + "step": 2481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:01.302092", + "step": 2481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010475664399564266, + "timestamp": "2025-09-10 02:21:01.304553", + "step": 2482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:01.335929", + "step": 2482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008311014622449875, + "timestamp": "2025-09-10 02:21:01.345565", + "step": 2483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:21:01.383373", + "step": 2483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018926413729786873, + "timestamp": "2025-09-10 02:21:01.418320", + "step": 2484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:21:01.453949", + "step": 2484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008039912208914757, + "timestamp": "2025-09-10 02:21:01.457516", + "step": 2485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:01.494246", + "step": 2485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004062869120389223, + "timestamp": "2025-09-10 02:21:01.506816", + "step": 2486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:21:01.539890", + "step": 2486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016554275527596474, + "timestamp": "2025-09-10 02:21:01.541912", + "step": 2487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:01.573571", + "step": 2487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016327768564224243, + "timestamp": "2025-09-10 02:21:01.601646", + "step": 2488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:01.633487", + "step": 2488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0051765465177595615, + "timestamp": "2025-09-10 02:21:01.635807", + "step": 2489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:01.672950", + "step": 2489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0068465410731732845, + "timestamp": "2025-09-10 02:21:01.680286", + "step": 2490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:01.713239", + "step": 2490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01244097389280796, + "timestamp": "2025-09-10 02:21:01.720561", + "step": 2491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:01.758847", + "step": 2491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0072895921766757965, + "timestamp": "2025-09-10 02:21:01.791378", + "step": 2492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:01.824943", + "step": 2492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010245480574667454, + "timestamp": "2025-09-10 02:21:01.829063", + "step": 2493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:01.861585", + "step": 2493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00806692149490118, + "timestamp": "2025-09-10 02:21:01.873213", + "step": 2494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:01.908408", + "step": 2494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003785413922742009, + "timestamp": "2025-09-10 02:21:01.914785", + "step": 2495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:01.950757", + "step": 2495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003561714431270957, + "timestamp": "2025-09-10 02:21:01.978198", + "step": 2496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:02.013614", + "step": 2496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025676547084003687, + "timestamp": "2025-09-10 02:21:02.018682", + "step": 2497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:02.059458", + "step": 2497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012827993370592594, + "timestamp": "2025-09-10 02:21:02.073181", + "step": 2498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:02.112247", + "step": 2498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010569563135504723, + "timestamp": "2025-09-10 02:21:02.119588", + "step": 2499, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:21:12.914739", + "step": 2499, + "epoch": 2 + }, + { + "type": "pplx", + "content": 18230494.697521377, + "timestamp": "2025-09-10 02:21:12.919042", + "step": 2499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:12.952692", + "step": 2499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007842292077839375, + "timestamp": "2025-09-10 02:21:12.979378", + "step": 2500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 2500", + "timestamp": "2025-09-10 02:21:18.083563", + "step": 2500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:18.117148", + "step": 2500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012375351507216692, + "timestamp": "2025-09-10 02:21:18.121456", + "step": 2501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:18.155236", + "step": 2501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001796129741705954, + "timestamp": "2025-09-10 02:21:18.164371", + "step": 2502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:18.199139", + "step": 2502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01929536834359169, + "timestamp": "2025-09-10 02:21:18.205478", + "step": 2503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:18.240242", + "step": 2503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012172207236289978, + "timestamp": "2025-09-10 02:21:18.270857", + "step": 2504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:18.313891", + "step": 2504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012182426638901234, + "timestamp": "2025-09-10 02:21:18.319073", + "step": 2505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:18.352236", + "step": 2505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019687467720359564, + "timestamp": "2025-09-10 02:21:18.359205", + "step": 2506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:18.392271", + "step": 2506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035485646221786737, + "timestamp": "2025-09-10 02:21:18.399195", + "step": 2507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:21:18.434049", + "step": 2507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017159203067421913, + "timestamp": "2025-09-10 02:21:18.468718", + "step": 2508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:18.505446", + "step": 2508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00748326163738966, + "timestamp": "2025-09-10 02:21:18.512742", + "step": 2509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:21:18.549559", + "step": 2509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03193259984254837, + "timestamp": "2025-09-10 02:21:18.551969", + "step": 2510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:18.587099", + "step": 2510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01799680106341839, + "timestamp": "2025-09-10 02:21:18.593659", + "step": 2511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:18.628387", + "step": 2511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0041481442749500275, + "timestamp": "2025-09-10 02:21:18.661835", + "step": 2512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:18.693026", + "step": 2512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011895556934177876, + "timestamp": "2025-09-10 02:21:18.695018", + "step": 2513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:18.725339", + "step": 2513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007325722835958004, + "timestamp": "2025-09-10 02:21:18.727977", + "step": 2514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:18.758656", + "step": 2514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011184222996234894, + "timestamp": "2025-09-10 02:21:18.770940", + "step": 2515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:18.803371", + "step": 2515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011311122216284275, + "timestamp": "2025-09-10 02:21:18.836820", + "step": 2516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:18.867500", + "step": 2516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030376592185348272, + "timestamp": "2025-09-10 02:21:18.872524", + "step": 2517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:18.903552", + "step": 2517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013369477353990078, + "timestamp": "2025-09-10 02:21:18.907979", + "step": 2518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:18.938715", + "step": 2518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013479441404342651, + "timestamp": "2025-09-10 02:21:18.943289", + "step": 2519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:18.973618", + "step": 2519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019774915650486946, + "timestamp": "2025-09-10 02:21:19.001111", + "step": 2520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:19.031737", + "step": 2520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017613651230931282, + "timestamp": "2025-09-10 02:21:19.036334", + "step": 2521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:19.067319", + "step": 2521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010517450049519539, + "timestamp": "2025-09-10 02:21:19.073952", + "step": 2522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:19.105667", + "step": 2522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010430269874632359, + "timestamp": "2025-09-10 02:21:19.115558", + "step": 2523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:19.146127", + "step": 2523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012062592431902885, + "timestamp": "2025-09-10 02:21:19.179159", + "step": 2524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:19.212693", + "step": 2524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021377981174737215, + "timestamp": "2025-09-10 02:21:19.225778", + "step": 2525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:19.259740", + "step": 2525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014699029736220837, + "timestamp": "2025-09-10 02:21:19.273123", + "step": 2526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:19.303637", + "step": 2526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00048396483180113137, + "timestamp": "2025-09-10 02:21:19.307765", + "step": 2527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:19.338356", + "step": 2527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010418041609227657, + "timestamp": "2025-09-10 02:21:19.363583", + "step": 2528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:19.394714", + "step": 2528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037663152907043695, + "timestamp": "2025-09-10 02:21:19.405172", + "step": 2529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:19.439960", + "step": 2529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006023730151355267, + "timestamp": "2025-09-10 02:21:19.453673", + "step": 2530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:19.484141", + "step": 2530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022649195045232773, + "timestamp": "2025-09-10 02:21:19.488584", + "step": 2531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:19.519990", + "step": 2531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008288032375276089, + "timestamp": "2025-09-10 02:21:19.548032", + "step": 2532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:19.579154", + "step": 2532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019115403294563293, + "timestamp": "2025-09-10 02:21:19.589494", + "step": 2533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:19.622081", + "step": 2533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005932506639510393, + "timestamp": "2025-09-10 02:21:19.628835", + "step": 2534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:21:19.680355", + "step": 2534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013247926719486713, + "timestamp": "2025-09-10 02:21:19.701849", + "step": 2535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:19.732943", + "step": 2535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022640167735517025, + "timestamp": "2025-09-10 02:21:19.766423", + "step": 2536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:19.802874", + "step": 2536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012684384128078818, + "timestamp": "2025-09-10 02:21:19.805624", + "step": 2537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:19.837258", + "step": 2537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007246891502290964, + "timestamp": "2025-09-10 02:21:19.841698", + "step": 2538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:19.876281", + "step": 2538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01728993095457554, + "timestamp": "2025-09-10 02:21:19.889930", + "step": 2539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:21:19.929146", + "step": 2539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011178716085851192, + "timestamp": "2025-09-10 02:21:19.964026", + "step": 2540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:20.000513", + "step": 2540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008106366731226444, + "timestamp": "2025-09-10 02:21:20.007292", + "step": 2541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:20.043383", + "step": 2541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02785063162446022, + "timestamp": "2025-09-10 02:21:20.050102", + "step": 2542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:20.084769", + "step": 2542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025354115292429924, + "timestamp": "2025-09-10 02:21:20.091793", + "step": 2543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:20.129268", + "step": 2543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003218255878891796, + "timestamp": "2025-09-10 02:21:20.155204", + "step": 2544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:20.190690", + "step": 2544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016497811302542686, + "timestamp": "2025-09-10 02:21:20.197651", + "step": 2545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:20.231970", + "step": 2545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013020535930991173, + "timestamp": "2025-09-10 02:21:20.236272", + "step": 2546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:20.269435", + "step": 2546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023302300833165646, + "timestamp": "2025-09-10 02:21:20.276990", + "step": 2547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:20.308936", + "step": 2547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026004468090832233, + "timestamp": "2025-09-10 02:21:20.336788", + "step": 2548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:21:20.375176", + "step": 2548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008805993013083935, + "timestamp": "2025-09-10 02:21:20.388388", + "step": 2549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:20.432408", + "step": 2549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001498592202551663, + "timestamp": "2025-09-10 02:21:20.440194", + "step": 2550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:20.477923", + "step": 2550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017248743679374456, + "timestamp": "2025-09-10 02:21:20.483230", + "step": 2551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:20.528188", + "step": 2551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006637393496930599, + "timestamp": "2025-09-10 02:21:20.553155", + "step": 2552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:20.585361", + "step": 2552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.045442163944244385, + "timestamp": "2025-09-10 02:21:20.590283", + "step": 2553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:20.622339", + "step": 2553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018050571903586388, + "timestamp": "2025-09-10 02:21:20.631958", + "step": 2554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:20.662783", + "step": 2554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028236510697752237, + "timestamp": "2025-09-10 02:21:20.669499", + "step": 2555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:20.699836", + "step": 2555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024217301979660988, + "timestamp": "2025-09-10 02:21:20.727494", + "step": 2556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:20.759827", + "step": 2556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008744286606088281, + "timestamp": "2025-09-10 02:21:20.768328", + "step": 2557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:20.799748", + "step": 2557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017181773437187076, + "timestamp": "2025-09-10 02:21:20.806556", + "step": 2558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:20.837541", + "step": 2558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028813164681196213, + "timestamp": "2025-09-10 02:21:20.844912", + "step": 2559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:20.876531", + "step": 2559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020341165363788605, + "timestamp": "2025-09-10 02:21:20.904934", + "step": 2560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:20.937659", + "step": 2560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013223226182162762, + "timestamp": "2025-09-10 02:21:20.943076", + "step": 2561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:20.972726", + "step": 2561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022325999452732503, + "timestamp": "2025-09-10 02:21:20.980421", + "step": 2562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:21.010388", + "step": 2562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023189482744783163, + "timestamp": "2025-09-10 02:21:21.018006", + "step": 2563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:21.048385", + "step": 2563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025371316820383072, + "timestamp": "2025-09-10 02:21:21.076165", + "step": 2564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:21.107692", + "step": 2564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001871153013780713, + "timestamp": "2025-09-10 02:21:21.117495", + "step": 2565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:21.151995", + "step": 2565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02054077573120594, + "timestamp": "2025-09-10 02:21:21.159853", + "step": 2566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:21.191020", + "step": 2566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016704251989722252, + "timestamp": "2025-09-10 02:21:21.194881", + "step": 2567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:21.226260", + "step": 2567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010114437900483608, + "timestamp": "2025-09-10 02:21:21.254592", + "step": 2568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:21.285689", + "step": 2568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009702653624117374, + "timestamp": "2025-09-10 02:21:21.287904", + "step": 2569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:21.317390", + "step": 2569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002314184093847871, + "timestamp": "2025-09-10 02:21:21.322059", + "step": 2570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:21.351928", + "step": 2570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005179584841243923, + "timestamp": "2025-09-10 02:21:21.355937", + "step": 2571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:21.386239", + "step": 2571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00541424797847867, + "timestamp": "2025-09-10 02:21:21.414415", + "step": 2572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:21.445599", + "step": 2572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001489723101258278, + "timestamp": "2025-09-10 02:21:21.449809", + "step": 2573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:21.481471", + "step": 2573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004314497113227844, + "timestamp": "2025-09-10 02:21:21.485844", + "step": 2574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:21.519739", + "step": 2574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00981599185615778, + "timestamp": "2025-09-10 02:21:21.533143", + "step": 2575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:21.563663", + "step": 2575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004870929755270481, + "timestamp": "2025-09-10 02:21:21.591404", + "step": 2576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:21.622364", + "step": 2576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016966963186860085, + "timestamp": "2025-09-10 02:21:21.632658", + "step": 2577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:21.667842", + "step": 2577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00209718756377697, + "timestamp": "2025-09-10 02:21:21.681559", + "step": 2578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:21.712192", + "step": 2578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006372584495693445, + "timestamp": "2025-09-10 02:21:21.719248", + "step": 2579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:21.750187", + "step": 2579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025949012488126755, + "timestamp": "2025-09-10 02:21:21.778358", + "step": 2580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:21.808438", + "step": 2580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02052624709904194, + "timestamp": "2025-09-10 02:21:21.812988", + "step": 2581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:21:21.858702", + "step": 2581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00807008147239685, + "timestamp": "2025-09-10 02:21:21.877864", + "step": 2582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:21.909123", + "step": 2582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018544618040323257, + "timestamp": "2025-09-10 02:21:21.920247", + "step": 2583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:21.951880", + "step": 2583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005954326130449772, + "timestamp": "2025-09-10 02:21:21.983423", + "step": 2584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:22.015401", + "step": 2584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010289547964930534, + "timestamp": "2025-09-10 02:21:22.017574", + "step": 2585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:22.055532", + "step": 2585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002948526758700609, + "timestamp": "2025-09-10 02:21:22.068927", + "step": 2586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:22.101655", + "step": 2586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0275122057646513, + "timestamp": "2025-09-10 02:21:22.114202", + "step": 2587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:21:22.152963", + "step": 2587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017236763378605247, + "timestamp": "2025-09-10 02:21:22.189514", + "step": 2588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:22.220119", + "step": 2588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016498176380991936, + "timestamp": "2025-09-10 02:21:22.227995", + "step": 2589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:22.259287", + "step": 2589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016592005267739296, + "timestamp": "2025-09-10 02:21:22.271498", + "step": 2590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:22.301955", + "step": 2590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010614178609102964, + "timestamp": "2025-09-10 02:21:22.314305", + "step": 2591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:22.346152", + "step": 2591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009229181334376335, + "timestamp": "2025-09-10 02:21:22.374266", + "step": 2592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:22.404849", + "step": 2592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001209449372254312, + "timestamp": "2025-09-10 02:21:22.409505", + "step": 2593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:22.441174", + "step": 2593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010440163314342499, + "timestamp": "2025-09-10 02:21:22.448846", + "step": 2594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:22.480478", + "step": 2594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017658992437645793, + "timestamp": "2025-09-10 02:21:22.487049", + "step": 2595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:22.517815", + "step": 2595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006559863686561584, + "timestamp": "2025-09-10 02:21:22.546183", + "step": 2596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:21:22.581657", + "step": 2596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015221442095935345, + "timestamp": "2025-09-10 02:21:22.596787", + "step": 2597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:22.628337", + "step": 2597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013818376464769244, + "timestamp": "2025-09-10 02:21:22.636111", + "step": 2598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:22.675626", + "step": 2598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0057688066735863686, + "timestamp": "2025-09-10 02:21:22.682673", + "step": 2599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:22.713742", + "step": 2599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0062509505078196526, + "timestamp": "2025-09-10 02:21:22.738675", + "step": 2600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:21:22.772019", + "step": 2600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01174467708915472, + "timestamp": "2025-09-10 02:21:22.774363", + "step": 2601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:22.806500", + "step": 2601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026152905076742172, + "timestamp": "2025-09-10 02:21:22.814030", + "step": 2602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:22.845107", + "step": 2602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017859925283119082, + "timestamp": "2025-09-10 02:21:22.852090", + "step": 2603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:22.883885", + "step": 2603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002031368436291814, + "timestamp": "2025-09-10 02:21:22.912569", + "step": 2604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:21:22.949939", + "step": 2604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004424991551786661, + "timestamp": "2025-09-10 02:21:22.965400", + "step": 2605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:22.999527", + "step": 2605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032836418598890305, + "timestamp": "2025-09-10 02:21:23.006892", + "step": 2606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:23.042573", + "step": 2606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030306854750961065, + "timestamp": "2025-09-10 02:21:23.055995", + "step": 2607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:23.087563", + "step": 2607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008659110171720386, + "timestamp": "2025-09-10 02:21:23.115941", + "step": 2608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:23.148036", + "step": 2608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008872836478985846, + "timestamp": "2025-09-10 02:21:23.158399", + "step": 2609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:23.199773", + "step": 2609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002644038759171963, + "timestamp": "2025-09-10 02:21:23.204388", + "step": 2610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:23.242060", + "step": 2610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008746746927499771, + "timestamp": "2025-09-10 02:21:23.249813", + "step": 2611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:23.284421", + "step": 2611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010678042890504003, + "timestamp": "2025-09-10 02:21:23.312325", + "step": 2612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:23.347381", + "step": 2612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006162055768072605, + "timestamp": "2025-09-10 02:21:23.352904", + "step": 2613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:23.383547", + "step": 2613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012832626700401306, + "timestamp": "2025-09-10 02:21:23.391370", + "step": 2614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:23.423232", + "step": 2614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0061977319419384, + "timestamp": "2025-09-10 02:21:23.430799", + "step": 2615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:23.464416", + "step": 2615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003420994326006621, + "timestamp": "2025-09-10 02:21:23.489618", + "step": 2616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:23.521959", + "step": 2616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004029420204460621, + "timestamp": "2025-09-10 02:21:23.528022", + "step": 2617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:23.567525", + "step": 2617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007904608733952045, + "timestamp": "2025-09-10 02:21:23.580897", + "step": 2618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:23.617531", + "step": 2618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.056298431009054184, + "timestamp": "2025-09-10 02:21:23.624513", + "step": 2619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:23.657783", + "step": 2619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04149520769715309, + "timestamp": "2025-09-10 02:21:23.682686", + "step": 2620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:23.714333", + "step": 2620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013901514001190662, + "timestamp": "2025-09-10 02:21:23.723015", + "step": 2621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:23.754388", + "step": 2621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005170899443328381, + "timestamp": "2025-09-10 02:21:23.762098", + "step": 2622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:23.803781", + "step": 2622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011854954063892365, + "timestamp": "2025-09-10 02:21:23.817179", + "step": 2623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:23.854925", + "step": 2623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022033916786313057, + "timestamp": "2025-09-10 02:21:23.883562", + "step": 2624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:21:23.915994", + "step": 2624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034682333935052156, + "timestamp": "2025-09-10 02:21:23.920954", + "step": 2625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:23.959395", + "step": 2625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019165745470672846, + "timestamp": "2025-09-10 02:21:23.966272", + "step": 2626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:24.001785", + "step": 2626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004640703264158219, + "timestamp": "2025-09-10 02:21:24.014321", + "step": 2627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:24.045683", + "step": 2627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022274174261838198, + "timestamp": "2025-09-10 02:21:24.073515", + "step": 2628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:24.106052", + "step": 2628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006261548958718777, + "timestamp": "2025-09-10 02:21:24.111226", + "step": 2629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:24.142328", + "step": 2629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015995798166841269, + "timestamp": "2025-09-10 02:21:24.149177", + "step": 2630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:24.181390", + "step": 2630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002429540967568755, + "timestamp": "2025-09-10 02:21:24.188917", + "step": 2631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:24.219910", + "step": 2631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019681244157254696, + "timestamp": "2025-09-10 02:21:24.247913", + "step": 2632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:24.280978", + "step": 2632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007192966062575579, + "timestamp": "2025-09-10 02:21:24.283097", + "step": 2633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:24.319155", + "step": 2633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007892182096838951, + "timestamp": "2025-09-10 02:21:24.332869", + "step": 2634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:24.365336", + "step": 2634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011840269435197115, + "timestamp": "2025-09-10 02:21:24.372246", + "step": 2635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:24.403970", + "step": 2635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014801176730543375, + "timestamp": "2025-09-10 02:21:24.432277", + "step": 2636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:24.462772", + "step": 2636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005055623594671488, + "timestamp": "2025-09-10 02:21:24.468042", + "step": 2637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:24.499747", + "step": 2637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026747877709567547, + "timestamp": "2025-09-10 02:21:24.511764", + "step": 2638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:24.541779", + "step": 2638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005939009133726358, + "timestamp": "2025-09-10 02:21:24.544499", + "step": 2639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:24.575720", + "step": 2639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021643701940774918, + "timestamp": "2025-09-10 02:21:24.603541", + "step": 2640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:24.634852", + "step": 2640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01125361304730177, + "timestamp": "2025-09-10 02:21:24.639827", + "step": 2641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:24.670124", + "step": 2641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010432683862745762, + "timestamp": "2025-09-10 02:21:24.672783", + "step": 2642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:24.703885", + "step": 2642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003385532647371292, + "timestamp": "2025-09-10 02:21:24.710428", + "step": 2643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:24.741330", + "step": 2643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025549919810146093, + "timestamp": "2025-09-10 02:21:24.769360", + "step": 2644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:24.800848", + "step": 2644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029829232953488827, + "timestamp": "2025-09-10 02:21:24.805464", + "step": 2645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:24.841850", + "step": 2645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008848052239045501, + "timestamp": "2025-09-10 02:21:24.852115", + "step": 2646, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:21:35.133425", + "step": 2646, + "epoch": 2 + }, + { + "type": "pplx", + "content": 18574059.11035138, + "timestamp": "2025-09-10 02:21:35.136168", + "step": 2646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:35.167002", + "step": 2646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029080223757773638, + "timestamp": "2025-09-10 02:21:35.172975", + "step": 2647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:35.204002", + "step": 2647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013911023270338774, + "timestamp": "2025-09-10 02:21:35.236493", + "step": 2648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:35.268728", + "step": 2648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001914841472171247, + "timestamp": "2025-09-10 02:21:35.276255", + "step": 2649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:35.309084", + "step": 2649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017167022451758385, + "timestamp": "2025-09-10 02:21:35.318997", + "step": 2650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:35.350982", + "step": 2650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003129334654659033, + "timestamp": "2025-09-10 02:21:35.357928", + "step": 2651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:35.392583", + "step": 2651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010660120751708746, + "timestamp": "2025-09-10 02:21:35.427184", + "step": 2652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:21:35.461172", + "step": 2652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017405982362106442, + "timestamp": "2025-09-10 02:21:35.474502", + "step": 2653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:35.505765", + "step": 2653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0052419803105294704, + "timestamp": "2025-09-10 02:21:35.512620", + "step": 2654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:35.542954", + "step": 2654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008888996206223965, + "timestamp": "2025-09-10 02:21:35.547239", + "step": 2655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:21:35.585764", + "step": 2655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006996404263190925, + "timestamp": "2025-09-10 02:21:35.622348", + "step": 2656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:35.653762", + "step": 2656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013661734759807587, + "timestamp": "2025-09-10 02:21:35.655988", + "step": 2657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:35.686175", + "step": 2657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007183533161878586, + "timestamp": "2025-09-10 02:21:35.690706", + "step": 2658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:35.723778", + "step": 2658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.059785980731248856, + "timestamp": "2025-09-10 02:21:35.733799", + "step": 2659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:35.765142", + "step": 2659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015089567750692368, + "timestamp": "2025-09-10 02:21:35.796304", + "step": 2660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:35.831263", + "step": 2660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020707848016172647, + "timestamp": "2025-09-10 02:21:35.835764", + "step": 2661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:35.873478", + "step": 2661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033768482971936464, + "timestamp": "2025-09-10 02:21:35.882373", + "step": 2662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:35.921734", + "step": 2662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00554437842220068, + "timestamp": "2025-09-10 02:21:35.931603", + "step": 2663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:35.970955", + "step": 2663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008375253528356552, + "timestamp": "2025-09-10 02:21:36.003311", + "step": 2664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:36.049885", + "step": 2664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003926243167370558, + "timestamp": "2025-09-10 02:21:36.055742", + "step": 2665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:36.101046", + "step": 2665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011568154441192746, + "timestamp": "2025-09-10 02:21:36.108919", + "step": 2666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:36.160206", + "step": 2666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036792331375181675, + "timestamp": "2025-09-10 02:21:36.167431", + "step": 2667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:36.223847", + "step": 2667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026783072389662266, + "timestamp": "2025-09-10 02:21:36.253011", + "step": 2668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:21:36.303802", + "step": 2668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006417171680368483, + "timestamp": "2025-09-10 02:21:36.317108", + "step": 2669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:36.378980", + "step": 2669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01134135015308857, + "timestamp": "2025-09-10 02:21:36.392637", + "step": 2670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:36.435220", + "step": 2670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004530813079327345, + "timestamp": "2025-09-10 02:21:36.446214", + "step": 2671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:36.481707", + "step": 2671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019003379857167602, + "timestamp": "2025-09-10 02:21:36.506514", + "step": 2672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:36.537494", + "step": 2672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009907567873597145, + "timestamp": "2025-09-10 02:21:36.541757", + "step": 2673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:36.575079", + "step": 2673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017410024302080274, + "timestamp": "2025-09-10 02:21:36.582406", + "step": 2674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:36.613848", + "step": 2674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021735227201133966, + "timestamp": "2025-09-10 02:21:36.624645", + "step": 2675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:36.656011", + "step": 2675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014428169233724475, + "timestamp": "2025-09-10 02:21:36.683637", + "step": 2676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:36.718504", + "step": 2676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00035420857602730393, + "timestamp": "2025-09-10 02:21:36.724320", + "step": 2677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:21:36.764578", + "step": 2677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004614558536559343, + "timestamp": "2025-09-10 02:21:36.780152", + "step": 2678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:36.818076", + "step": 2678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006453771609812975, + "timestamp": "2025-09-10 02:21:36.824490", + "step": 2679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:36.872335", + "step": 2679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034508321434259415, + "timestamp": "2025-09-10 02:21:36.903162", + "step": 2680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:36.960264", + "step": 2680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011013428680598736, + "timestamp": "2025-09-10 02:21:36.965747", + "step": 2681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:37.008533", + "step": 2681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015115066431462765, + "timestamp": "2025-09-10 02:21:37.015238", + "step": 2682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:37.055473", + "step": 2682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00047675202949903905, + "timestamp": "2025-09-10 02:21:37.060861", + "step": 2683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:37.100431", + "step": 2683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03187503293156624, + "timestamp": "2025-09-10 02:21:37.127892", + "step": 2684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:37.174265", + "step": 2684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006218705675564706, + "timestamp": "2025-09-10 02:21:37.182305", + "step": 2685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:37.213798", + "step": 2685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013111613225191832, + "timestamp": "2025-09-10 02:21:37.218321", + "step": 2686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:37.248961", + "step": 2686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007048290688544512, + "timestamp": "2025-09-10 02:21:37.255708", + "step": 2687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:37.287452", + "step": 2687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017233153339475393, + "timestamp": "2025-09-10 02:21:37.315254", + "step": 2688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:37.348112", + "step": 2688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018436602549627423, + "timestamp": "2025-09-10 02:21:37.355939", + "step": 2689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:37.394439", + "step": 2689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01839214749634266, + "timestamp": "2025-09-10 02:21:37.402313", + "step": 2690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:37.437257", + "step": 2690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006622764747589827, + "timestamp": "2025-09-10 02:21:37.444049", + "step": 2691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:37.479721", + "step": 2691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007068789564073086, + "timestamp": "2025-09-10 02:21:37.507432", + "step": 2692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:37.544360", + "step": 2692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019901886116713285, + "timestamp": "2025-09-10 02:21:37.548756", + "step": 2693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:37.586064", + "step": 2693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0049681165255606174, + "timestamp": "2025-09-10 02:21:37.590610", + "step": 2694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:37.621080", + "step": 2694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012007238110527396, + "timestamp": "2025-09-10 02:21:37.625231", + "step": 2695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:37.655813", + "step": 2695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008289673365652561, + "timestamp": "2025-09-10 02:21:37.681152", + "step": 2696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:37.712672", + "step": 2696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011843375395983458, + "timestamp": "2025-09-10 02:21:37.714804", + "step": 2697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:37.746400", + "step": 2697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0065173497423529625, + "timestamp": "2025-09-10 02:21:37.756744", + "step": 2698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:37.787428", + "step": 2698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036999728763476014, + "timestamp": "2025-09-10 02:21:37.790044", + "step": 2699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:37.827483", + "step": 2699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01047492679208517, + "timestamp": "2025-09-10 02:21:37.855289", + "step": 2700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:37.908537", + "step": 2700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033648067619651556, + "timestamp": "2025-09-10 02:21:37.923693", + "step": 2701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:38.005482", + "step": 2701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001408770913258195, + "timestamp": "2025-09-10 02:21:38.022992", + "step": 2702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:38.106573", + "step": 2702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011482964269816875, + "timestamp": "2025-09-10 02:21:38.117121", + "step": 2703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:38.165908", + "step": 2703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008962144493125379, + "timestamp": "2025-09-10 02:21:38.204532", + "step": 2704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:38.289760", + "step": 2704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022653231862932444, + "timestamp": "2025-09-10 02:21:38.294943", + "step": 2705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:38.365940", + "step": 2705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013882993720471859, + "timestamp": "2025-09-10 02:21:38.383012", + "step": 2706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:38.457044", + "step": 2706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001253266236744821, + "timestamp": "2025-09-10 02:21:38.464033", + "step": 2707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:38.543788", + "step": 2707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013703681761398911, + "timestamp": "2025-09-10 02:21:38.575094", + "step": 2708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:38.622236", + "step": 2708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033048451878130436, + "timestamp": "2025-09-10 02:21:38.630211", + "step": 2709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:38.675604", + "step": 2709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006388475303538144, + "timestamp": "2025-09-10 02:21:38.682726", + "step": 2710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:38.721884", + "step": 2710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018123749643564224, + "timestamp": "2025-09-10 02:21:38.729692", + "step": 2711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:38.783696", + "step": 2711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003044202458113432, + "timestamp": "2025-09-10 02:21:38.816789", + "step": 2712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:38.862713", + "step": 2712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013637479860335588, + "timestamp": "2025-09-10 02:21:38.875394", + "step": 2713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:38.929319", + "step": 2713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018133390694856644, + "timestamp": "2025-09-10 02:21:38.940358", + "step": 2714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:21:39.009625", + "step": 2714, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01170498225837946, + "timestamp": "2025-09-10 02:21:39.033076", + "step": 2715, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:39.075710", + "step": 2715, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006123408675193787, + "timestamp": "2025-09-10 02:21:39.103674", + "step": 2716, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:39.153884", + "step": 2716, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004910778952762485, + "timestamp": "2025-09-10 02:21:39.159177", + "step": 2717, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:39.204546", + "step": 2717, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005400904337875545, + "timestamp": "2025-09-10 02:21:39.212357", + "step": 2718, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:39.265552", + "step": 2718, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02437109872698784, + "timestamp": "2025-09-10 02:21:39.272686", + "step": 2719, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:39.311229", + "step": 2719, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003212881973013282, + "timestamp": "2025-09-10 02:21:39.339010", + "step": 2720, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:39.389143", + "step": 2720, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004050101153552532, + "timestamp": "2025-09-10 02:21:39.399713", + "step": 2721, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:39.441377", + "step": 2721, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005341669311746955, + "timestamp": "2025-09-10 02:21:39.448999", + "step": 2722, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:39.488414", + "step": 2722, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020567751489579678, + "timestamp": "2025-09-10 02:21:39.496393", + "step": 2723, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:39.534121", + "step": 2723, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017389410641044378, + "timestamp": "2025-09-10 02:21:39.562692", + "step": 2724, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:39.640851", + "step": 2724, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.047998156398534775, + "timestamp": "2025-09-10 02:21:39.658482", + "step": 2725, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:39.726652", + "step": 2725, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006225144607014954, + "timestamp": "2025-09-10 02:21:39.743044", + "step": 2726, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:39.810940", + "step": 2726, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033620852045714855, + "timestamp": "2025-09-10 02:21:39.827193", + "step": 2727, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:39.890724", + "step": 2727, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009153550490736961, + "timestamp": "2025-09-10 02:21:39.915738", + "step": 2728, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:39.965315", + "step": 2728, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016450297553092241, + "timestamp": "2025-09-10 02:21:39.973894", + "step": 2729, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:40.010673", + "step": 2729, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00031730628688819706, + "timestamp": "2025-09-10 02:21:40.017831", + "step": 2730, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:40.051540", + "step": 2730, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05042002350091934, + "timestamp": "2025-09-10 02:21:40.059029", + "step": 2731, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:40.097441", + "step": 2731, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009090257226489484, + "timestamp": "2025-09-10 02:21:40.125225", + "step": 2732, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:40.168073", + "step": 2732, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011402477510273457, + "timestamp": "2025-09-10 02:21:40.178050", + "step": 2733, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:21:40.236822", + "step": 2733, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006502915173768997, + "timestamp": "2025-09-10 02:21:40.254504", + "step": 2734, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:40.294820", + "step": 2734, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04736243933439255, + "timestamp": "2025-09-10 02:21:40.301645", + "step": 2735, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:40.339970", + "step": 2735, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024216361343860626, + "timestamp": "2025-09-10 02:21:40.367094", + "step": 2736, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:40.400543", + "step": 2736, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004260449670255184, + "timestamp": "2025-09-10 02:21:40.404660", + "step": 2737, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:40.436564", + "step": 2737, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016414711717516184, + "timestamp": "2025-09-10 02:21:40.442927", + "step": 2738, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:40.473988", + "step": 2738, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007454793085344136, + "timestamp": "2025-09-10 02:21:40.480752", + "step": 2739, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:40.511868", + "step": 2739, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025238368660211563, + "timestamp": "2025-09-10 02:21:40.539369", + "step": 2740, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:40.573420", + "step": 2740, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014565506717190146, + "timestamp": "2025-09-10 02:21:40.581040", + "step": 2741, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:40.614852", + "step": 2741, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028659238014370203, + "timestamp": "2025-09-10 02:21:40.628243", + "step": 2742, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:40.659481", + "step": 2742, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005315977614372969, + "timestamp": "2025-09-10 02:21:40.666068", + "step": 2743, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:40.697352", + "step": 2743, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011978724505752325, + "timestamp": "2025-09-10 02:21:40.725029", + "step": 2744, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:40.757583", + "step": 2744, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008306491072289646, + "timestamp": "2025-09-10 02:21:40.770565", + "step": 2745, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:40.801906", + "step": 2745, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002079846104606986, + "timestamp": "2025-09-10 02:21:40.808974", + "step": 2746, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:40.840990", + "step": 2746, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013913876377046108, + "timestamp": "2025-09-10 02:21:40.845922", + "step": 2747, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:40.884272", + "step": 2747, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014017752837389708, + "timestamp": "2025-09-10 02:21:40.915824", + "step": 2748, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:40.952577", + "step": 2748, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018380844965577126, + "timestamp": "2025-09-10 02:21:40.957550", + "step": 2749, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:40.991539", + "step": 2749, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04001007229089737, + "timestamp": "2025-09-10 02:21:41.001708", + "step": 2750, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:41.039352", + "step": 2750, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020031663589179516, + "timestamp": "2025-09-10 02:21:41.049458", + "step": 2751, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:41.089957", + "step": 2751, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007604836719110608, + "timestamp": "2025-09-10 02:21:41.118330", + "step": 2752, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:41.155487", + "step": 2752, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003465786576271057, + "timestamp": "2025-09-10 02:21:41.165491", + "step": 2753, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:41.202972", + "step": 2753, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002132646186510101, + "timestamp": "2025-09-10 02:21:41.209606", + "step": 2754, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:41.248213", + "step": 2754, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009989741956815124, + "timestamp": "2025-09-10 02:21:41.255377", + "step": 2755, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:41.297555", + "step": 2755, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006549767684191465, + "timestamp": "2025-09-10 02:21:41.325684", + "step": 2756, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:41.364478", + "step": 2756, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001987830735743046, + "timestamp": "2025-09-10 02:21:41.366592", + "step": 2757, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:41.402406", + "step": 2757, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006204267032444477, + "timestamp": "2025-09-10 02:21:41.408956", + "step": 2758, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:41.446821", + "step": 2758, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01345762424170971, + "timestamp": "2025-09-10 02:21:41.456689", + "step": 2759, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:41.491228", + "step": 2759, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002160031348466873, + "timestamp": "2025-09-10 02:21:41.524617", + "step": 2760, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:41.555466", + "step": 2760, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004899486084468663, + "timestamp": "2025-09-10 02:21:41.557921", + "step": 2761, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:41.588416", + "step": 2761, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002803497016429901, + "timestamp": "2025-09-10 02:21:41.595467", + "step": 2762, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:41.625790", + "step": 2762, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015402629505842924, + "timestamp": "2025-09-10 02:21:41.632695", + "step": 2763, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:41.663176", + "step": 2763, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005135064013302326, + "timestamp": "2025-09-10 02:21:41.691459", + "step": 2764, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:41.722179", + "step": 2764, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008388920687139034, + "timestamp": "2025-09-10 02:21:41.731503", + "step": 2765, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:41.763229", + "step": 2765, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001156167476437986, + "timestamp": "2025-09-10 02:21:41.775466", + "step": 2766, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:41.808417", + "step": 2766, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003134796628728509, + "timestamp": "2025-09-10 02:21:41.815705", + "step": 2767, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:41.847048", + "step": 2767, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03722445294260979, + "timestamp": "2025-09-10 02:21:41.874747", + "step": 2768, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:41.906579", + "step": 2768, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030981996096670628, + "timestamp": "2025-09-10 02:21:41.910934", + "step": 2769, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:41.941649", + "step": 2769, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026767630130052567, + "timestamp": "2025-09-10 02:21:41.945986", + "step": 2770, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:41.977046", + "step": 2770, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004015625920146704, + "timestamp": "2025-09-10 02:21:41.983688", + "step": 2771, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:42.014899", + "step": 2771, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01961735263466835, + "timestamp": "2025-09-10 02:21:42.042822", + "step": 2772, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:42.074168", + "step": 2772, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004195111338049173, + "timestamp": "2025-09-10 02:21:42.081567", + "step": 2773, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:42.113471", + "step": 2773, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00503236660733819, + "timestamp": "2025-09-10 02:21:42.123489", + "step": 2774, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:42.155417", + "step": 2774, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00502787483856082, + "timestamp": "2025-09-10 02:21:42.162981", + "step": 2775, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:42.194347", + "step": 2775, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024155091494321823, + "timestamp": "2025-09-10 02:21:42.225406", + "step": 2776, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:42.256407", + "step": 2776, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02515154518187046, + "timestamp": "2025-09-10 02:21:42.264155", + "step": 2777, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:42.295541", + "step": 2777, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018512414768338203, + "timestamp": "2025-09-10 02:21:42.306224", + "step": 2778, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:42.338151", + "step": 2778, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006325080059468746, + "timestamp": "2025-09-10 02:21:42.349127", + "step": 2779, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:42.385992", + "step": 2779, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009645558893680573, + "timestamp": "2025-09-10 02:21:42.417123", + "step": 2780, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:42.447826", + "step": 2780, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004604689311236143, + "timestamp": "2025-09-10 02:21:42.453228", + "step": 2781, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:42.483632", + "step": 2781, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004039444029331207, + "timestamp": "2025-09-10 02:21:42.490568", + "step": 2782, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:42.520587", + "step": 2782, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015288168797269464, + "timestamp": "2025-09-10 02:21:42.528202", + "step": 2783, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:42.559405", + "step": 2783, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004764964338392019, + "timestamp": "2025-09-10 02:21:42.584432", + "step": 2784, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:42.615196", + "step": 2784, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001542671350762248, + "timestamp": "2025-09-10 02:21:42.623018", + "step": 2785, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:42.654012", + "step": 2785, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000828076503239572, + "timestamp": "2025-09-10 02:21:42.665094", + "step": 2786, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:42.696485", + "step": 2786, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006047695060260594, + "timestamp": "2025-09-10 02:21:42.706673", + "step": 2787, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:42.737739", + "step": 2787, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002101297490298748, + "timestamp": "2025-09-10 02:21:42.770979", + "step": 2788, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:21:42.803580", + "step": 2788, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025870150420814753, + "timestamp": "2025-09-10 02:21:42.806965", + "step": 2789, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:42.841421", + "step": 2789, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028584027662873268, + "timestamp": "2025-09-10 02:21:42.848544", + "step": 2790, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:42.887114", + "step": 2790, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.035170648247003555, + "timestamp": "2025-09-10 02:21:42.893869", + "step": 2791, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:42.929384", + "step": 2791, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014619800494983792, + "timestamp": "2025-09-10 02:21:42.957717", + "step": 2792, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:42.995938", + "step": 2792, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01359619665890932, + "timestamp": "2025-09-10 02:21:43.001446", + "step": 2793, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:21:53.465801", + "step": 2793, + "epoch": 2 + }, + { + "type": "pplx", + "content": 19197019.4612857, + "timestamp": "2025-09-10 02:21:53.469410", + "step": 2793, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:53.499938", + "step": 2793, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000690083543304354, + "timestamp": "2025-09-10 02:21:53.510007", + "step": 2794, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:53.544004", + "step": 2794, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.056752197444438934, + "timestamp": "2025-09-10 02:21:53.557315", + "step": 2795, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:53.588933", + "step": 2795, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039436123333871365, + "timestamp": "2025-09-10 02:21:53.616773", + "step": 2796, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:53.647612", + "step": 2796, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003669754136353731, + "timestamp": "2025-09-10 02:21:53.653063", + "step": 2797, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:53.684749", + "step": 2797, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009105192148126662, + "timestamp": "2025-09-10 02:21:53.692063", + "step": 2798, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:53.723340", + "step": 2798, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012229084968566895, + "timestamp": "2025-09-10 02:21:53.727670", + "step": 2799, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:53.757906", + "step": 2799, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010326796909794211, + "timestamp": "2025-09-10 02:21:53.783340", + "step": 2800, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:53.814232", + "step": 2800, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015513862483203411, + "timestamp": "2025-09-10 02:21:53.816480", + "step": 2801, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:21:53.851680", + "step": 2801, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022338945418596268, + "timestamp": "2025-09-10 02:21:53.865695", + "step": 2802, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:53.898345", + "step": 2802, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012829114682972431, + "timestamp": "2025-09-10 02:21:53.905512", + "step": 2803, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:53.935544", + "step": 2803, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004405899439007044, + "timestamp": "2025-09-10 02:21:53.961305", + "step": 2804, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:54.000565", + "step": 2804, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013872667914256454, + "timestamp": "2025-09-10 02:21:54.011093", + "step": 2805, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:54.046640", + "step": 2805, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005773500073701143, + "timestamp": "2025-09-10 02:21:54.057600", + "step": 2806, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:54.092646", + "step": 2806, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010280570015311241, + "timestamp": "2025-09-10 02:21:54.099713", + "step": 2807, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:54.134369", + "step": 2807, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03476468473672867, + "timestamp": "2025-09-10 02:21:54.162308", + "step": 2808, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:54.196654", + "step": 2808, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031658527441322803, + "timestamp": "2025-09-10 02:21:54.198954", + "step": 2809, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:54.232450", + "step": 2809, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006831489037722349, + "timestamp": "2025-09-10 02:21:54.237018", + "step": 2810, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:54.278093", + "step": 2810, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033258756157010794, + "timestamp": "2025-09-10 02:21:54.290608", + "step": 2811, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:54.328729", + "step": 2811, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024213241413235664, + "timestamp": "2025-09-10 02:21:54.359819", + "step": 2812, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:54.393296", + "step": 2812, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003739068517461419, + "timestamp": "2025-09-10 02:21:54.398570", + "step": 2813, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:54.449409", + "step": 2813, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015904037281870842, + "timestamp": "2025-09-10 02:21:54.457159", + "step": 2814, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:54.494234", + "step": 2814, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012923607137054205, + "timestamp": "2025-09-10 02:21:54.498995", + "step": 2815, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:54.535980", + "step": 2815, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011023187544196844, + "timestamp": "2025-09-10 02:21:54.564641", + "step": 2816, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:54.599843", + "step": 2816, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005850085057318211, + "timestamp": "2025-09-10 02:21:54.604642", + "step": 2817, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:54.642705", + "step": 2817, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006922147236764431, + "timestamp": "2025-09-10 02:21:54.647104", + "step": 2818, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:54.682379", + "step": 2818, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006881711073219776, + "timestamp": "2025-09-10 02:21:54.688316", + "step": 2819, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:54.724767", + "step": 2819, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002905226079747081, + "timestamp": "2025-09-10 02:21:54.759067", + "step": 2820, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:54.796273", + "step": 2820, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.042592164129018784, + "timestamp": "2025-09-10 02:21:54.804964", + "step": 2821, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:54.847161", + "step": 2821, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0068238540552556515, + "timestamp": "2025-09-10 02:21:54.853508", + "step": 2822, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:54.891239", + "step": 2822, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02196827158331871, + "timestamp": "2025-09-10 02:21:54.898080", + "step": 2823, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:54.934244", + "step": 2823, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007932315580546856, + "timestamp": "2025-09-10 02:21:54.962639", + "step": 2824, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:54.994981", + "step": 2824, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009902574121952057, + "timestamp": "2025-09-10 02:21:55.003516", + "step": 2825, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:55.035712", + "step": 2825, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023097884841263294, + "timestamp": "2025-09-10 02:21:55.046003", + "step": 2826, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:55.076515", + "step": 2826, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010782705619931221, + "timestamp": "2025-09-10 02:21:55.083561", + "step": 2827, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:55.113817", + "step": 2827, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004876940976828337, + "timestamp": "2025-09-10 02:21:55.145150", + "step": 2828, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:21:55.174739", + "step": 2828, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016192414332181215, + "timestamp": "2025-09-10 02:21:55.176954", + "step": 2829, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:55.207595", + "step": 2829, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013937892392277718, + "timestamp": "2025-09-10 02:21:55.214827", + "step": 2830, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:55.245059", + "step": 2830, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003963314928114414, + "timestamp": "2025-09-10 02:21:55.252530", + "step": 2831, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:55.283769", + "step": 2831, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018773654475808144, + "timestamp": "2025-09-10 02:21:55.308637", + "step": 2832, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:55.340079", + "step": 2832, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017363855615258217, + "timestamp": "2025-09-10 02:21:55.344371", + "step": 2833, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:55.374762", + "step": 2833, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003922105301171541, + "timestamp": "2025-09-10 02:21:55.381507", + "step": 2834, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:55.412237", + "step": 2834, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01441988069564104, + "timestamp": "2025-09-10 02:21:55.419228", + "step": 2835, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:55.456602", + "step": 2835, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016393983736634254, + "timestamp": "2025-09-10 02:21:55.487947", + "step": 2836, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:55.520112", + "step": 2836, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013055962044745684, + "timestamp": "2025-09-10 02:21:55.522406", + "step": 2837, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:55.553310", + "step": 2837, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002322630723938346, + "timestamp": "2025-09-10 02:21:55.560592", + "step": 2838, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:55.591278", + "step": 2838, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012900998117402196, + "timestamp": "2025-09-10 02:21:55.599034", + "step": 2839, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:55.631620", + "step": 2839, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004327766597270966, + "timestamp": "2025-09-10 02:21:55.659512", + "step": 2840, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:55.692246", + "step": 2840, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029336088337004185, + "timestamp": "2025-09-10 02:21:55.699962", + "step": 2841, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:55.731399", + "step": 2841, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008453912101686, + "timestamp": "2025-09-10 02:21:55.735812", + "step": 2842, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:21:55.774425", + "step": 2842, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00400108378380537, + "timestamp": "2025-09-10 02:21:55.790113", + "step": 2843, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:55.819970", + "step": 2843, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001131609664298594, + "timestamp": "2025-09-10 02:21:55.847859", + "step": 2844, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:55.883692", + "step": 2844, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04177376627922058, + "timestamp": "2025-09-10 02:21:55.887997", + "step": 2845, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:55.926164", + "step": 2845, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013375887647271156, + "timestamp": "2025-09-10 02:21:55.934060", + "step": 2846, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:21:55.979412", + "step": 2846, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034866467118263245, + "timestamp": "2025-09-10 02:21:55.995621", + "step": 2847, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:56.040186", + "step": 2847, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008052507415413857, + "timestamp": "2025-09-10 02:21:56.065565", + "step": 2848, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:56.106829", + "step": 2848, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027601835317909718, + "timestamp": "2025-09-10 02:21:56.119851", + "step": 2849, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:56.159735", + "step": 2849, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0046163699589669704, + "timestamp": "2025-09-10 02:21:56.167667", + "step": 2850, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:56.204477", + "step": 2850, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024019996635615826, + "timestamp": "2025-09-10 02:21:56.208674", + "step": 2851, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:56.250504", + "step": 2851, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00619547301903367, + "timestamp": "2025-09-10 02:21:56.278254", + "step": 2852, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:56.309613", + "step": 2852, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011628863401710987, + "timestamp": "2025-09-10 02:21:56.311783", + "step": 2853, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:21:56.341612", + "step": 2853, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011419777758419514, + "timestamp": "2025-09-10 02:21:56.343838", + "step": 2854, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:21:56.382442", + "step": 2854, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012729802168905735, + "timestamp": "2025-09-10 02:21:56.398256", + "step": 2855, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:21:56.429465", + "step": 2855, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015720551600679755, + "timestamp": "2025-09-10 02:21:56.453669", + "step": 2856, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:56.484715", + "step": 2856, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014717082493007183, + "timestamp": "2025-09-10 02:21:56.488184", + "step": 2857, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:56.518013", + "step": 2857, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003454964840784669, + "timestamp": "2025-09-10 02:21:56.525556", + "step": 2858, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:56.556246", + "step": 2858, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002767723286524415, + "timestamp": "2025-09-10 02:21:56.566504", + "step": 2859, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:56.596507", + "step": 2859, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012848809361457825, + "timestamp": "2025-09-10 02:21:56.624554", + "step": 2860, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:56.655309", + "step": 2860, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015174107626080513, + "timestamp": "2025-09-10 02:21:56.663342", + "step": 2861, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:21:56.697483", + "step": 2861, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006852737162262201, + "timestamp": "2025-09-10 02:21:56.711212", + "step": 2862, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:56.742102", + "step": 2862, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016556836664676666, + "timestamp": "2025-09-10 02:21:56.749989", + "step": 2863, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:56.780543", + "step": 2863, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012003665789961815, + "timestamp": "2025-09-10 02:21:56.809224", + "step": 2864, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:56.840636", + "step": 2864, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033726885449141264, + "timestamp": "2025-09-10 02:21:56.846253", + "step": 2865, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:56.878932", + "step": 2865, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001546733663417399, + "timestamp": "2025-09-10 02:21:56.891219", + "step": 2866, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:56.925077", + "step": 2866, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018309442326426506, + "timestamp": "2025-09-10 02:21:56.937635", + "step": 2867, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:21:56.976009", + "step": 2867, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005003686994314194, + "timestamp": "2025-09-10 02:21:57.011500", + "step": 2868, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:57.050458", + "step": 2868, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01586066372692585, + "timestamp": "2025-09-10 02:21:57.058502", + "step": 2869, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:57.094025", + "step": 2869, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022168749943375587, + "timestamp": "2025-09-10 02:21:57.101730", + "step": 2870, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:21:57.145405", + "step": 2870, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011965368874371052, + "timestamp": "2025-09-10 02:21:57.161292", + "step": 2871, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:57.202765", + "step": 2871, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0080100167542696, + "timestamp": "2025-09-10 02:21:57.230608", + "step": 2872, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:21:57.269690", + "step": 2872, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004915738943964243, + "timestamp": "2025-09-10 02:21:57.282756", + "step": 2873, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:21:57.324127", + "step": 2873, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036590684205293655, + "timestamp": "2025-09-10 02:21:57.337518", + "step": 2874, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:57.378714", + "step": 2874, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016932787373661995, + "timestamp": "2025-09-10 02:21:57.383194", + "step": 2875, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:57.421402", + "step": 2875, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002477414207533002, + "timestamp": "2025-09-10 02:21:57.453426", + "step": 2876, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:57.493622", + "step": 2876, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009233239106833935, + "timestamp": "2025-09-10 02:21:57.497109", + "step": 2877, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:57.531367", + "step": 2877, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004387423861771822, + "timestamp": "2025-09-10 02:21:57.538838", + "step": 2878, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:57.576986", + "step": 2878, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022044102661311626, + "timestamp": "2025-09-10 02:21:57.581193", + "step": 2879, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:57.620656", + "step": 2879, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003344293450936675, + "timestamp": "2025-09-10 02:21:57.653820", + "step": 2880, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:57.688571", + "step": 2880, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033048386685550213, + "timestamp": "2025-09-10 02:21:57.693903", + "step": 2881, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:57.729127", + "step": 2881, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013956844806671143, + "timestamp": "2025-09-10 02:21:57.733657", + "step": 2882, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:57.772898", + "step": 2882, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017580043524503708, + "timestamp": "2025-09-10 02:21:57.780655", + "step": 2883, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:57.823983", + "step": 2883, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006970468442887068, + "timestamp": "2025-09-10 02:21:57.852000", + "step": 2884, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:57.892963", + "step": 2884, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002934870542958379, + "timestamp": "2025-09-10 02:21:57.901055", + "step": 2885, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:57.938969", + "step": 2885, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011385561665520072, + "timestamp": "2025-09-10 02:21:57.946648", + "step": 2886, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:57.999025", + "step": 2886, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010981320403516293, + "timestamp": "2025-09-10 02:21:58.010261", + "step": 2887, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:58.064350", + "step": 2887, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01800290308892727, + "timestamp": "2025-09-10 02:21:58.096941", + "step": 2888, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:58.139621", + "step": 2888, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00524926045909524, + "timestamp": "2025-09-10 02:21:58.147001", + "step": 2889, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:21:58.205833", + "step": 2889, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004338169004768133, + "timestamp": "2025-09-10 02:21:58.223203", + "step": 2890, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:58.268415", + "step": 2890, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01495091337710619, + "timestamp": "2025-09-10 02:21:58.275632", + "step": 2891, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:58.305991", + "step": 2891, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038837611209601164, + "timestamp": "2025-09-10 02:21:58.331474", + "step": 2892, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 688 + ], + "flops": 20408222954560 + }, + "timestamp": "2025-09-10 02:21:58.386481", + "step": 2892, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001870313542895019, + "timestamp": "2025-09-10 02:21:58.410780", + "step": 2893, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:58.441343", + "step": 2893, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013324370374903083, + "timestamp": "2025-09-10 02:21:58.444028", + "step": 2894, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:58.474641", + "step": 2894, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003104160074144602, + "timestamp": "2025-09-10 02:21:58.482522", + "step": 2895, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:58.514154", + "step": 2895, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025289487093687057, + "timestamp": "2025-09-10 02:21:58.542545", + "step": 2896, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:58.573421", + "step": 2896, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017517339438199997, + "timestamp": "2025-09-10 02:21:58.575491", + "step": 2897, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:58.606654", + "step": 2897, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004530445206910372, + "timestamp": "2025-09-10 02:21:58.618786", + "step": 2898, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:21:58.650642", + "step": 2898, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009508281364105642, + "timestamp": "2025-09-10 02:21:58.658545", + "step": 2899, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:58.689706", + "step": 2899, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012976233847439289, + "timestamp": "2025-09-10 02:21:58.717747", + "step": 2900, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:21:58.754243", + "step": 2900, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002717123832553625, + "timestamp": "2025-09-10 02:21:58.769406", + "step": 2901, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:21:58.800524", + "step": 2901, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019545141607522964, + "timestamp": "2025-09-10 02:21:58.807986", + "step": 2902, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 560 + ], + "flops": 16611393146432 + }, + "timestamp": "2025-09-10 02:21:58.854567", + "step": 2902, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025809798389673233, + "timestamp": "2025-09-10 02:21:58.873979", + "step": 2903, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:58.910932", + "step": 2903, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010793408146128058, + "timestamp": "2025-09-10 02:21:58.939062", + "step": 2904, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:58.969530", + "step": 2904, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002281660446897149, + "timestamp": "2025-09-10 02:21:58.977402", + "step": 2905, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:59.008417", + "step": 2905, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008847257122397423, + "timestamp": "2025-09-10 02:21:59.015401", + "step": 2906, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:59.051422", + "step": 2906, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00565936928614974, + "timestamp": "2025-09-10 02:21:59.055983", + "step": 2907, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:21:59.094239", + "step": 2907, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032904818654060364, + "timestamp": "2025-09-10 02:21:59.127238", + "step": 2908, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:59.158629", + "step": 2908, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015532250981777906, + "timestamp": "2025-09-10 02:21:59.168322", + "step": 2909, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:59.200228", + "step": 2909, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015883222222328186, + "timestamp": "2025-09-10 02:21:59.207380", + "step": 2910, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:59.238162", + "step": 2910, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01465687807649374, + "timestamp": "2025-09-10 02:21:59.250384", + "step": 2911, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:59.285448", + "step": 2911, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000894768163561821, + "timestamp": "2025-09-10 02:21:59.310633", + "step": 2912, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:59.341168", + "step": 2912, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001928298850543797, + "timestamp": "2025-09-10 02:21:59.343617", + "step": 2913, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:59.375092", + "step": 2913, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025657066144049168, + "timestamp": "2025-09-10 02:21:59.379746", + "step": 2914, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:21:59.414770", + "step": 2914, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007942708325572312, + "timestamp": "2025-09-10 02:21:59.428803", + "step": 2915, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:59.461407", + "step": 2915, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012047487311065197, + "timestamp": "2025-09-10 02:21:59.486808", + "step": 2916, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:21:59.527655", + "step": 2916, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030188219621777534, + "timestamp": "2025-09-10 02:21:59.532533", + "step": 2917, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:21:59.570354", + "step": 2917, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02199845388531685, + "timestamp": "2025-09-10 02:21:59.577441", + "step": 2918, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:21:59.614234", + "step": 2918, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010863765142858028, + "timestamp": "2025-09-10 02:21:59.625220", + "step": 2919, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:21:59.669965", + "step": 2919, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012042339658364654, + "timestamp": "2025-09-10 02:21:59.704651", + "step": 2920, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:59.735315", + "step": 2920, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04105643555521965, + "timestamp": "2025-09-10 02:21:59.737634", + "step": 2921, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:59.767694", + "step": 2921, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044288174831308424, + "timestamp": "2025-09-10 02:21:59.770211", + "step": 2922, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:21:59.800706", + "step": 2922, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006732792127877474, + "timestamp": "2025-09-10 02:21:59.811614", + "step": 2923, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:21:59.841546", + "step": 2923, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017994015943259, + "timestamp": "2025-09-10 02:21:59.869331", + "step": 2924, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:21:59.900936", + "step": 2924, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027029775083065033, + "timestamp": "2025-09-10 02:21:59.910460", + "step": 2925, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:21:59.956229", + "step": 2925, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005239599384367466, + "timestamp": "2025-09-10 02:21:59.960170", + "step": 2926, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:21:59.994080", + "step": 2926, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006697875447571278, + "timestamp": "2025-09-10 02:22:00.004375", + "step": 2927, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:00.038069", + "step": 2927, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03148489445447922, + "timestamp": "2025-09-10 02:22:00.063041", + "step": 2928, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:00.094406", + "step": 2928, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020124191418290138, + "timestamp": "2025-09-10 02:22:00.099254", + "step": 2929, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:22:00.140652", + "step": 2929, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010933955200016499, + "timestamp": "2025-09-10 02:22:00.157710", + "step": 2930, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:00.190255", + "step": 2930, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013913453556597233, + "timestamp": "2025-09-10 02:22:00.202665", + "step": 2931, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:00.234784", + "step": 2931, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003610015381127596, + "timestamp": "2025-09-10 02:22:00.262635", + "step": 2932, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:00.293763", + "step": 2932, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001940641668625176, + "timestamp": "2025-09-10 02:22:00.298029", + "step": 2933, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:00.330128", + "step": 2933, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037323671858757734, + "timestamp": "2025-09-10 02:22:00.340931", + "step": 2934, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:00.374918", + "step": 2934, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01665830798447132, + "timestamp": "2025-09-10 02:22:00.388319", + "step": 2935, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:00.419887", + "step": 2935, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005614429712295532, + "timestamp": "2025-09-10 02:22:00.447847", + "step": 2936, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:00.479376", + "step": 2936, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027582976035773754, + "timestamp": "2025-09-10 02:22:00.487368", + "step": 2937, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:00.518110", + "step": 2937, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003742832690477371, + "timestamp": "2025-09-10 02:22:00.525908", + "step": 2938, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:00.556958", + "step": 2938, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.039867255836725235, + "timestamp": "2025-09-10 02:22:00.561533", + "step": 2939, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:00.593247", + "step": 2939, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0062867277301847935, + "timestamp": "2025-09-10 02:22:00.618271", + "step": 2940, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:22:10.925419", + "step": 2940, + "epoch": 2 + }, + { + "type": "pplx", + "content": 20450711.8035112, + "timestamp": "2025-09-10 02:22:10.928876", + "step": 2940, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:10.958777", + "step": 2940, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00988749973475933, + "timestamp": "2025-09-10 02:22:10.967420", + "step": 2941, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:10.999190", + "step": 2941, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008435306954197586, + "timestamp": "2025-09-10 02:22:11.006091", + "step": 2942, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:11.036991", + "step": 2942, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0046732001937925816, + "timestamp": "2025-09-10 02:22:11.047700", + "step": 2943, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:11.079822", + "step": 2943, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01026154775172472, + "timestamp": "2025-09-10 02:22:11.107510", + "step": 2944, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:11.138368", + "step": 2944, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024215218145400286, + "timestamp": "2025-09-10 02:22:11.143757", + "step": 2945, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 816 + ], + "flops": 24205052762688 + }, + "timestamp": "2025-09-10 02:22:11.212348", + "step": 2945, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000555099977646023, + "timestamp": "2025-09-10 02:22:11.240841", + "step": 2946, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:11.270823", + "step": 2946, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003919025417417288, + "timestamp": "2025-09-10 02:22:11.278728", + "step": 2947, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:11.309682", + "step": 2947, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007654453511349857, + "timestamp": "2025-09-10 02:22:11.340662", + "step": 2948, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:11.373858", + "step": 2948, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007311842869967222, + "timestamp": "2025-09-10 02:22:11.383725", + "step": 2949, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:11.414295", + "step": 2949, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015561177860945463, + "timestamp": "2025-09-10 02:22:11.418689", + "step": 2950, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:11.449620", + "step": 2950, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003131111618131399, + "timestamp": "2025-09-10 02:22:11.457383", + "step": 2951, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:11.487810", + "step": 2951, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004036908969283104, + "timestamp": "2025-09-10 02:22:11.513200", + "step": 2952, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:22:11.546861", + "step": 2952, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003508640918880701, + "timestamp": "2025-09-10 02:22:11.560205", + "step": 2953, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:11.590549", + "step": 2953, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005527927540242672, + "timestamp": "2025-09-10 02:22:11.597845", + "step": 2954, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:11.630810", + "step": 2954, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013634276576340199, + "timestamp": "2025-09-10 02:22:11.638282", + "step": 2955, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:11.669259", + "step": 2955, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004053633194416761, + "timestamp": "2025-09-10 02:22:11.697820", + "step": 2956, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:11.728460", + "step": 2956, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004635666497051716, + "timestamp": "2025-09-10 02:22:11.733519", + "step": 2957, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:11.763929", + "step": 2957, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001686741947196424, + "timestamp": "2025-09-10 02:22:11.770938", + "step": 2958, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:11.802254", + "step": 2958, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013765445910394192, + "timestamp": "2025-09-10 02:22:11.809208", + "step": 2959, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:11.839864", + "step": 2959, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005294339498504996, + "timestamp": "2025-09-10 02:22:11.872901", + "step": 2960, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:11.904098", + "step": 2960, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012235159985721111, + "timestamp": "2025-09-10 02:22:11.914672", + "step": 2961, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:22:11.951422", + "step": 2961, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004377015866339207, + "timestamp": "2025-09-10 02:22:11.965222", + "step": 2962, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:11.996458", + "step": 2962, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006293201586231589, + "timestamp": "2025-09-10 02:22:12.003260", + "step": 2963, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:12.036249", + "step": 2963, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035972814075648785, + "timestamp": "2025-09-10 02:22:12.068062", + "step": 2964, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:12.107853", + "step": 2964, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.064762369031087e-05, + "timestamp": "2025-09-10 02:22:12.110062", + "step": 2965, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:12.149792", + "step": 2965, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006316312937997282, + "timestamp": "2025-09-10 02:22:12.157021", + "step": 2966, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:12.189130", + "step": 2966, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010401438921689987, + "timestamp": "2025-09-10 02:22:12.196795", + "step": 2967, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:12.235983", + "step": 2967, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016096774488687515, + "timestamp": "2025-09-10 02:22:12.261540", + "step": 2968, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:12.295464", + "step": 2968, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002507053781300783, + "timestamp": "2025-09-10 02:22:12.303329", + "step": 2969, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:12.337886", + "step": 2969, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009550213580951095, + "timestamp": "2025-09-10 02:22:12.351270", + "step": 2970, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:12.382407", + "step": 2970, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00042923627188429236, + "timestamp": "2025-09-10 02:22:12.394518", + "step": 2971, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:12.426545", + "step": 2971, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001259053940884769, + "timestamp": "2025-09-10 02:22:12.451335", + "step": 2972, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:12.484720", + "step": 2972, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0048403749242424965, + "timestamp": "2025-09-10 02:22:12.497725", + "step": 2973, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:12.531127", + "step": 2973, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005875090602785349, + "timestamp": "2025-09-10 02:22:12.538186", + "step": 2974, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:12.570319", + "step": 2974, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015017333498690277, + "timestamp": "2025-09-10 02:22:12.577711", + "step": 2975, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:12.608608", + "step": 2975, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01487821340560913, + "timestamp": "2025-09-10 02:22:12.637276", + "step": 2976, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:12.670601", + "step": 2976, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001285710313823074, + "timestamp": "2025-09-10 02:22:12.677586", + "step": 2977, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:12.709280", + "step": 2977, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004458011593669653, + "timestamp": "2025-09-10 02:22:12.720274", + "step": 2978, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:12.755600", + "step": 2978, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002914820215664804, + "timestamp": "2025-09-10 02:22:12.760278", + "step": 2979, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:12.791212", + "step": 2979, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000645567080937326, + "timestamp": "2025-09-10 02:22:12.819769", + "step": 2980, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:22:12.853166", + "step": 2980, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005663609481416643, + "timestamp": "2025-09-10 02:22:12.855008", + "step": 2981, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:12.885874", + "step": 2981, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03028297796845436, + "timestamp": "2025-09-10 02:22:12.892771", + "step": 2982, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:12.922939", + "step": 2982, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005917864036746323, + "timestamp": "2025-09-10 02:22:12.926940", + "step": 2983, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:12.959502", + "step": 2983, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001026555197313428, + "timestamp": "2025-09-10 02:22:12.988143", + "step": 2984, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:13.028026", + "step": 2984, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009340193355455995, + "timestamp": "2025-09-10 02:22:13.035431", + "step": 2985, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:13.072083", + "step": 2985, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008202405297197402, + "timestamp": "2025-09-10 02:22:13.079195", + "step": 2986, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:13.116266", + "step": 2986, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028560981154441833, + "timestamp": "2025-09-10 02:22:13.128338", + "step": 2987, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:13.169528", + "step": 2987, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015045542386360466, + "timestamp": "2025-09-10 02:22:13.197543", + "step": 2988, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:13.229385", + "step": 2988, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005772449658252299, + "timestamp": "2025-09-10 02:22:13.236140", + "step": 2989, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:13.269514", + "step": 2989, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008060900145210326, + "timestamp": "2025-09-10 02:22:13.277134", + "step": 2990, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:13.311834", + "step": 2990, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006533232517540455, + "timestamp": "2025-09-10 02:22:13.319297", + "step": 2991, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:13.354743", + "step": 2991, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0189223550260067, + "timestamp": "2025-09-10 02:22:13.382512", + "step": 2992, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:13.414356", + "step": 2992, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038932212628424168, + "timestamp": "2025-09-10 02:22:13.424102", + "step": 2993, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:13.464029", + "step": 2993, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023500225506722927, + "timestamp": "2025-09-10 02:22:13.471545", + "step": 2994, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:13.519871", + "step": 2994, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029731089249253273, + "timestamp": "2025-09-10 02:22:13.522461", + "step": 2995, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:13.556026", + "step": 2995, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00560992443934083, + "timestamp": "2025-09-10 02:22:13.583643", + "step": 2996, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:13.614809", + "step": 2996, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013980664080008864, + "timestamp": "2025-09-10 02:22:13.625149", + "step": 2997, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:13.657053", + "step": 2997, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007363726384937763, + "timestamp": "2025-09-10 02:22:13.669642", + "step": 2998, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:13.704091", + "step": 2998, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005370273254811764, + "timestamp": "2025-09-10 02:22:13.711107", + "step": 2999, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:13.743172", + "step": 2999, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008254798129200935, + "timestamp": "2025-09-10 02:22:13.771504", + "step": 3000, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 3000", + "timestamp": "2025-09-10 02:22:18.507756", + "step": 3000, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:18.561073", + "step": 3000, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013195289298892021, + "timestamp": "2025-09-10 02:22:18.568642", + "step": 3001, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:18.605389", + "step": 3001, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00990669522434473, + "timestamp": "2025-09-10 02:22:18.609221", + "step": 3002, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:18.644904", + "step": 3002, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004560007713735104, + "timestamp": "2025-09-10 02:22:18.649210", + "step": 3003, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:18.687567", + "step": 3003, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024112870451062918, + "timestamp": "2025-09-10 02:22:18.713265", + "step": 3004, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:18.749859", + "step": 3004, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007551188464276493, + "timestamp": "2025-09-10 02:22:18.754752", + "step": 3005, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:18.786814", + "step": 3005, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002144659374607727, + "timestamp": "2025-09-10 02:22:18.793390", + "step": 3006, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:22:18.831745", + "step": 3006, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013251977507025003, + "timestamp": "2025-09-10 02:22:18.847306", + "step": 3007, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:18.879089", + "step": 3007, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027837217203341424, + "timestamp": "2025-09-10 02:22:18.906511", + "step": 3008, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:18.939130", + "step": 3008, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012016237014904618, + "timestamp": "2025-09-10 02:22:18.944273", + "step": 3009, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:22:18.982271", + "step": 3009, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016353409737348557, + "timestamp": "2025-09-10 02:22:18.996050", + "step": 3010, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:19.034920", + "step": 3010, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025526031851768494, + "timestamp": "2025-09-10 02:22:19.041451", + "step": 3011, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:19.077655", + "step": 3011, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007432910148054361, + "timestamp": "2025-09-10 02:22:19.108392", + "step": 3012, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:19.146844", + "step": 3012, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007051877328194678, + "timestamp": "2025-09-10 02:22:19.155911", + "step": 3013, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:19.193294", + "step": 3013, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010938274674117565, + "timestamp": "2025-09-10 02:22:19.205424", + "step": 3014, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:19.241698", + "step": 3014, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03111579827964306, + "timestamp": "2025-09-10 02:22:19.248729", + "step": 3015, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:19.280541", + "step": 3015, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008617566898465157, + "timestamp": "2025-09-10 02:22:19.308351", + "step": 3016, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:19.341108", + "step": 3016, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014785193838179111, + "timestamp": "2025-09-10 02:22:19.346261", + "step": 3017, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:19.379772", + "step": 3017, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002250525401905179, + "timestamp": "2025-09-10 02:22:19.386866", + "step": 3018, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:19.418987", + "step": 3018, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003496368881314993, + "timestamp": "2025-09-10 02:22:19.426212", + "step": 3019, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:19.457212", + "step": 3019, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003628613776527345, + "timestamp": "2025-09-10 02:22:19.485625", + "step": 3020, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:19.521575", + "step": 3020, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0066203526221215725, + "timestamp": "2025-09-10 02:22:19.530594", + "step": 3021, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:19.563427", + "step": 3021, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015238435298670083, + "timestamp": "2025-09-10 02:22:19.570361", + "step": 3022, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:19.601647", + "step": 3022, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003497667144984007, + "timestamp": "2025-09-10 02:22:19.608232", + "step": 3023, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:19.639837", + "step": 3023, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04256868362426758, + "timestamp": "2025-09-10 02:22:19.671190", + "step": 3024, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:19.703218", + "step": 3024, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011781870853155851, + "timestamp": "2025-09-10 02:22:19.705106", + "step": 3025, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:19.736844", + "step": 3025, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00529795978218317, + "timestamp": "2025-09-10 02:22:19.743694", + "step": 3026, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:19.774776", + "step": 3026, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011905157007277012, + "timestamp": "2025-09-10 02:22:19.782323", + "step": 3027, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:19.814092", + "step": 3027, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007259399862959981, + "timestamp": "2025-09-10 02:22:19.842214", + "step": 3028, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:19.873893", + "step": 3028, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00030881358543410897, + "timestamp": "2025-09-10 02:22:19.881354", + "step": 3029, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:19.913353", + "step": 3029, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02642730250954628, + "timestamp": "2025-09-10 02:22:19.920234", + "step": 3030, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:22:19.961020", + "step": 3030, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.062217261642217636, + "timestamp": "2025-09-10 02:22:19.977252", + "step": 3031, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:20.010741", + "step": 3031, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010299092158675194, + "timestamp": "2025-09-10 02:22:20.038290", + "step": 3032, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:20.069983", + "step": 3032, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013357808347791433, + "timestamp": "2025-09-10 02:22:20.072354", + "step": 3033, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:20.104344", + "step": 3033, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00296528497710824, + "timestamp": "2025-09-10 02:22:20.111120", + "step": 3034, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:20.142696", + "step": 3034, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.037451110780239105, + "timestamp": "2025-09-10 02:22:20.149596", + "step": 3035, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:20.182290", + "step": 3035, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031133827287703753, + "timestamp": "2025-09-10 02:22:20.210302", + "step": 3036, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:20.242675", + "step": 3036, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0067618959583342075, + "timestamp": "2025-09-10 02:22:20.255690", + "step": 3037, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:20.287885", + "step": 3037, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028574629686772823, + "timestamp": "2025-09-10 02:22:20.291785", + "step": 3038, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:20.322595", + "step": 3038, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004291358927730471, + "timestamp": "2025-09-10 02:22:20.329540", + "step": 3039, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:20.360367", + "step": 3039, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04179126024246216, + "timestamp": "2025-09-10 02:22:20.388995", + "step": 3040, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:20.419723", + "step": 3040, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003463194938376546, + "timestamp": "2025-09-10 02:22:20.424416", + "step": 3041, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:22:20.462606", + "step": 3041, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04269900918006897, + "timestamp": "2025-09-10 02:22:20.478289", + "step": 3042, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:20.510403", + "step": 3042, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008046741597354412, + "timestamp": "2025-09-10 02:22:20.517900", + "step": 3043, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:20.548962", + "step": 3043, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00842567440122366, + "timestamp": "2025-09-10 02:22:20.576760", + "step": 3044, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:20.608236", + "step": 3044, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003832954214885831, + "timestamp": "2025-09-10 02:22:20.612900", + "step": 3045, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:20.643437", + "step": 3045, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032644220627844334, + "timestamp": "2025-09-10 02:22:20.650686", + "step": 3046, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:20.682397", + "step": 3046, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000450856052339077, + "timestamp": "2025-09-10 02:22:20.694639", + "step": 3047, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:20.725556", + "step": 3047, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005584734957665205, + "timestamp": "2025-09-10 02:22:20.753421", + "step": 3048, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:20.784286", + "step": 3048, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001507714157924056, + "timestamp": "2025-09-10 02:22:20.788909", + "step": 3049, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:20.819348", + "step": 3049, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006397690158337355, + "timestamp": "2025-09-10 02:22:20.826389", + "step": 3050, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:20.858993", + "step": 3050, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005857815849594772, + "timestamp": "2025-09-10 02:22:20.866794", + "step": 3051, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:20.897758", + "step": 3051, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018613949650898576, + "timestamp": "2025-09-10 02:22:20.925941", + "step": 3052, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:22:20.963332", + "step": 3052, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009978823363780975, + "timestamp": "2025-09-10 02:22:20.979018", + "step": 3053, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:21.010775", + "step": 3053, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00379360793158412, + "timestamp": "2025-09-10 02:22:21.021623", + "step": 3054, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:21.053164", + "step": 3054, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018329472513869405, + "timestamp": "2025-09-10 02:22:21.060677", + "step": 3055, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:21.091584", + "step": 3055, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014588729478418827, + "timestamp": "2025-09-10 02:22:21.119910", + "step": 3056, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:21.150206", + "step": 3056, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032337453216314316, + "timestamp": "2025-09-10 02:22:21.154756", + "step": 3057, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:21.187083", + "step": 3057, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008836713968776166, + "timestamp": "2025-09-10 02:22:21.199654", + "step": 3058, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:21.231554", + "step": 3058, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018349305028095841, + "timestamp": "2025-09-10 02:22:21.238523", + "step": 3059, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:21.271396", + "step": 3059, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006054178345948458, + "timestamp": "2025-09-10 02:22:21.303173", + "step": 3060, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:21.334981", + "step": 3060, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019653644412755966, + "timestamp": "2025-09-10 02:22:21.340260", + "step": 3061, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:21.372363", + "step": 3061, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006816718727350235, + "timestamp": "2025-09-10 02:22:21.384960", + "step": 3062, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:21.415562", + "step": 3062, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038755948189646006, + "timestamp": "2025-09-10 02:22:21.420127", + "step": 3063, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:21.450497", + "step": 3063, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013827037764713168, + "timestamp": "2025-09-10 02:22:21.483511", + "step": 3064, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:21.518804", + "step": 3064, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010334816761314869, + "timestamp": "2025-09-10 02:22:21.531426", + "step": 3065, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:21.564761", + "step": 3065, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011374552734196186, + "timestamp": "2025-09-10 02:22:21.572062", + "step": 3066, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:21.605520", + "step": 3066, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006082989741116762, + "timestamp": "2025-09-10 02:22:21.609560", + "step": 3067, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:21.645832", + "step": 3067, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014264583587646484, + "timestamp": "2025-09-10 02:22:21.680079", + "step": 3068, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:22:21.710880", + "step": 3068, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007734424900263548, + "timestamp": "2025-09-10 02:22:21.713191", + "step": 3069, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:21.744035", + "step": 3069, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017366407439112663, + "timestamp": "2025-09-10 02:22:21.754021", + "step": 3070, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:21.784755", + "step": 3070, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003733862191438675, + "timestamp": "2025-09-10 02:22:21.791537", + "step": 3071, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:21.822650", + "step": 3071, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025246471632272005, + "timestamp": "2025-09-10 02:22:21.850485", + "step": 3072, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:21.881411", + "step": 3072, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013509529642760754, + "timestamp": "2025-09-10 02:22:21.889415", + "step": 3073, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:21.924665", + "step": 3073, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036960511351935565, + "timestamp": "2025-09-10 02:22:21.938376", + "step": 3074, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:21.973789", + "step": 3074, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023911669850349426, + "timestamp": "2025-09-10 02:22:21.984903", + "step": 3075, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:22:22.039465", + "step": 3075, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024533343967050314, + "timestamp": "2025-09-10 02:22:22.076033", + "step": 3076, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:22.113073", + "step": 3076, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010715676471590996, + "timestamp": "2025-09-10 02:22:22.121327", + "step": 3077, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:22.154654", + "step": 3077, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028496759478002787, + "timestamp": "2025-09-10 02:22:22.161892", + "step": 3078, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:22.200186", + "step": 3078, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009175264276564121, + "timestamp": "2025-09-10 02:22:22.211068", + "step": 3079, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:22.250256", + "step": 3079, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0040994067676365376, + "timestamp": "2025-09-10 02:22:22.278185", + "step": 3080, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:22.309139", + "step": 3080, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005873729009181261, + "timestamp": "2025-09-10 02:22:22.313893", + "step": 3081, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:22.345022", + "step": 3081, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004864770919084549, + "timestamp": "2025-09-10 02:22:22.349484", + "step": 3082, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:22.380778", + "step": 3082, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015795464860275388, + "timestamp": "2025-09-10 02:22:22.384862", + "step": 3083, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:22.417242", + "step": 3083, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015763568226248026, + "timestamp": "2025-09-10 02:22:22.442602", + "step": 3084, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:22.474120", + "step": 3084, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003129825461655855, + "timestamp": "2025-09-10 02:22:22.476517", + "step": 3085, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:22:22.514847", + "step": 3085, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005250798421911895, + "timestamp": "2025-09-10 02:22:22.530777", + "step": 3086, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:22.561937", + "step": 3086, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003833092050626874, + "timestamp": "2025-09-10 02:22:22.568814", + "step": 3087, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:22:33.022514", + "step": 3087, + "epoch": 2 + }, + { + "type": "pplx", + "content": 21153755.598216124, + "timestamp": "2025-09-10 02:22:33.029278", + "step": 3087, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:33.064348", + "step": 3087, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009679818176664412, + "timestamp": "2025-09-10 02:22:33.096215", + "step": 3088, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:33.136391", + "step": 3088, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006355203688144684, + "timestamp": "2025-09-10 02:22:33.141075", + "step": 3089, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:33.179377", + "step": 3089, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007879887707531452, + "timestamp": "2025-09-10 02:22:33.189546", + "step": 3090, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:33.227835", + "step": 3090, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011465544812381268, + "timestamp": "2025-09-10 02:22:33.233963", + "step": 3091, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:33.267909", + "step": 3091, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022248754277825356, + "timestamp": "2025-09-10 02:22:33.295620", + "step": 3092, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:33.328598", + "step": 3092, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025890696793794632, + "timestamp": "2025-09-10 02:22:33.340896", + "step": 3093, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:22:33.378892", + "step": 3093, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001566907623782754, + "timestamp": "2025-09-10 02:22:33.394508", + "step": 3094, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:33.427731", + "step": 3094, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018820820841938257, + "timestamp": "2025-09-10 02:22:33.435036", + "step": 3095, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:33.466008", + "step": 3095, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020010853186249733, + "timestamp": "2025-09-10 02:22:33.494127", + "step": 3096, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:22:33.525651", + "step": 3096, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019767414778470993, + "timestamp": "2025-09-10 02:22:33.528109", + "step": 3097, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:33.562476", + "step": 3097, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007151364348828793, + "timestamp": "2025-09-10 02:22:33.566486", + "step": 3098, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:33.599120", + "step": 3098, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023438245989382267, + "timestamp": "2025-09-10 02:22:33.605795", + "step": 3099, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:33.637566", + "step": 3099, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004773031105287373, + "timestamp": "2025-09-10 02:22:33.662185", + "step": 3100, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:33.693484", + "step": 3100, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004419370554387569, + "timestamp": "2025-09-10 02:22:33.695734", + "step": 3101, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:33.726823", + "step": 3101, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012314915657043457, + "timestamp": "2025-09-10 02:22:33.731053", + "step": 3102, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:33.765637", + "step": 3102, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007404958363622427, + "timestamp": "2025-09-10 02:22:33.777847", + "step": 3103, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:33.809233", + "step": 3103, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002475725719705224, + "timestamp": "2025-09-10 02:22:33.836868", + "step": 3104, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:33.869402", + "step": 3104, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024621224030852318, + "timestamp": "2025-09-10 02:22:33.876148", + "step": 3105, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:33.907176", + "step": 3105, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006465516518801451, + "timestamp": "2025-09-10 02:22:33.913998", + "step": 3106, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:33.947508", + "step": 3106, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004534423351287842, + "timestamp": "2025-09-10 02:22:33.960891", + "step": 3107, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:33.993962", + "step": 3107, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025858450680971146, + "timestamp": "2025-09-10 02:22:34.022012", + "step": 3108, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:34.053224", + "step": 3108, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024823250714689493, + "timestamp": "2025-09-10 02:22:34.057775", + "step": 3109, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:34.089247", + "step": 3109, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004186380188912153, + "timestamp": "2025-09-10 02:22:34.099417", + "step": 3110, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:34.131364", + "step": 3110, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007520094513893127, + "timestamp": "2025-09-10 02:22:34.141494", + "step": 3111, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:34.172972", + "step": 3111, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00030438616522587836, + "timestamp": "2025-09-10 02:22:34.201555", + "step": 3112, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:34.232666", + "step": 3112, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00505801709368825, + "timestamp": "2025-09-10 02:22:34.234997", + "step": 3113, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:34.266322", + "step": 3113, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030636286828666925, + "timestamp": "2025-09-10 02:22:34.273511", + "step": 3114, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:22:34.314929", + "step": 3114, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.039970513433218, + "timestamp": "2025-09-10 02:22:34.332216", + "step": 3115, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:34.364789", + "step": 3115, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012349791824817657, + "timestamp": "2025-09-10 02:22:34.397696", + "step": 3116, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:34.428527", + "step": 3116, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034343355800956488, + "timestamp": "2025-09-10 02:22:34.433349", + "step": 3117, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:34.463675", + "step": 3117, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011677572503685951, + "timestamp": "2025-09-10 02:22:34.470737", + "step": 3118, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:34.502989", + "step": 3118, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023600461427122355, + "timestamp": "2025-09-10 02:22:34.515486", + "step": 3119, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:22:34.550914", + "step": 3119, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012422216823324561, + "timestamp": "2025-09-10 02:22:34.585815", + "step": 3120, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:34.616858", + "step": 3120, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005536832381039858, + "timestamp": "2025-09-10 02:22:34.621216", + "step": 3121, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:34.651904", + "step": 3121, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003003006335347891, + "timestamp": "2025-09-10 02:22:34.658901", + "step": 3122, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:34.689307", + "step": 3122, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019031567499041557, + "timestamp": "2025-09-10 02:22:34.696479", + "step": 3123, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:22:34.734404", + "step": 3123, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015222270740196109, + "timestamp": "2025-09-10 02:22:34.770870", + "step": 3124, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:34.805056", + "step": 3124, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003712509525939822, + "timestamp": "2025-09-10 02:22:34.809662", + "step": 3125, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:34.840686", + "step": 3125, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011844148393720388, + "timestamp": "2025-09-10 02:22:34.843120", + "step": 3126, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:34.874283", + "step": 3126, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009678156115114689, + "timestamp": "2025-09-10 02:22:34.885863", + "step": 3127, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:34.917147", + "step": 3127, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001543865422718227, + "timestamp": "2025-09-10 02:22:34.945213", + "step": 3128, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:34.975336", + "step": 3128, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022532050497829914, + "timestamp": "2025-09-10 02:22:34.977838", + "step": 3129, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:35.009158", + "step": 3129, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002928555477410555, + "timestamp": "2025-09-10 02:22:35.021490", + "step": 3130, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:35.062819", + "step": 3130, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005914249341003597, + "timestamp": "2025-09-10 02:22:35.070289", + "step": 3131, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:35.115911", + "step": 3131, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007928982959128916, + "timestamp": "2025-09-10 02:22:35.144531", + "step": 3132, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:35.184890", + "step": 3132, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026510064490139484, + "timestamp": "2025-09-10 02:22:35.193172", + "step": 3133, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:35.234127", + "step": 3133, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004251073580235243, + "timestamp": "2025-09-10 02:22:35.241154", + "step": 3134, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:35.275464", + "step": 3134, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003887306433171034, + "timestamp": "2025-09-10 02:22:35.286274", + "step": 3135, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:35.319472", + "step": 3135, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008226651698350906, + "timestamp": "2025-09-10 02:22:35.347308", + "step": 3136, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:35.377446", + "step": 3136, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013611420057713985, + "timestamp": "2025-09-10 02:22:35.381890", + "step": 3137, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:35.414659", + "step": 3137, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004120334633626044, + "timestamp": "2025-09-10 02:22:35.421833", + "step": 3138, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:35.458490", + "step": 3138, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002287933137267828, + "timestamp": "2025-09-10 02:22:35.472204", + "step": 3139, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:35.506202", + "step": 3139, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017298969032708555, + "timestamp": "2025-09-10 02:22:35.533829", + "step": 3140, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:22:35.570686", + "step": 3140, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024141445464920253, + "timestamp": "2025-09-10 02:22:35.586110", + "step": 3141, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:35.617668", + "step": 3141, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008984781452454627, + "timestamp": "2025-09-10 02:22:35.629397", + "step": 3142, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:22:35.669036", + "step": 3142, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010877908207476139, + "timestamp": "2025-09-10 02:22:35.684699", + "step": 3143, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:35.716267", + "step": 3143, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018387990072369576, + "timestamp": "2025-09-10 02:22:35.743904", + "step": 3144, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:22:35.777546", + "step": 3144, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019596496131271124, + "timestamp": "2025-09-10 02:22:35.790640", + "step": 3145, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:35.822410", + "step": 3145, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041655570385046303, + "timestamp": "2025-09-10 02:22:35.829175", + "step": 3146, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:35.861912", + "step": 3146, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006447642226703465, + "timestamp": "2025-09-10 02:22:35.868664", + "step": 3147, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:22:35.915220", + "step": 3147, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039002900011837482, + "timestamp": "2025-09-10 02:22:35.955086", + "step": 3148, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:35.987428", + "step": 3148, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022350053768604994, + "timestamp": "2025-09-10 02:22:35.992181", + "step": 3149, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:36.023626", + "step": 3149, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005409142933785915, + "timestamp": "2025-09-10 02:22:36.030476", + "step": 3150, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:36.062820", + "step": 3150, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011333615984767675, + "timestamp": "2025-09-10 02:22:36.070256", + "step": 3151, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:36.100823", + "step": 3151, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002690895344130695, + "timestamp": "2025-09-10 02:22:36.128651", + "step": 3152, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:36.161113", + "step": 3152, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017261261120438576, + "timestamp": "2025-09-10 02:22:36.173823", + "step": 3153, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:36.208430", + "step": 3153, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018687748815864325, + "timestamp": "2025-09-10 02:22:36.222132", + "step": 3154, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:36.255541", + "step": 3154, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018482712330296636, + "timestamp": "2025-09-10 02:22:36.262692", + "step": 3155, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:36.293994", + "step": 3155, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002486660669092089, + "timestamp": "2025-09-10 02:22:36.322466", + "step": 3156, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:36.353796", + "step": 3156, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037758280523121357, + "timestamp": "2025-09-10 02:22:36.355847", + "step": 3157, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:22:36.389754", + "step": 3157, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007112511666491628, + "timestamp": "2025-09-10 02:22:36.392450", + "step": 3158, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:36.423967", + "step": 3158, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000233599086641334, + "timestamp": "2025-09-10 02:22:36.436441", + "step": 3159, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:36.470084", + "step": 3159, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017964192375075072, + "timestamp": "2025-09-10 02:22:36.493663", + "step": 3160, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:36.526932", + "step": 3160, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004801702219992876, + "timestamp": "2025-09-10 02:22:36.540081", + "step": 3161, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:36.584674", + "step": 3161, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020199043676257133, + "timestamp": "2025-09-10 02:22:36.591734", + "step": 3162, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:36.625173", + "step": 3162, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010675977682694793, + "timestamp": "2025-09-10 02:22:36.636048", + "step": 3163, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:36.673457", + "step": 3163, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003448014031164348, + "timestamp": "2025-09-10 02:22:36.701119", + "step": 3164, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:36.734990", + "step": 3164, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020471213792916387, + "timestamp": "2025-09-10 02:22:36.739144", + "step": 3165, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:36.787514", + "step": 3165, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005103556322865188, + "timestamp": "2025-09-10 02:22:36.794029", + "step": 3166, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:36.827768", + "step": 3166, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002306754468008876, + "timestamp": "2025-09-10 02:22:36.830155", + "step": 3167, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:36.863788", + "step": 3167, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009209056152030826, + "timestamp": "2025-09-10 02:22:36.891997", + "step": 3168, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:36.925434", + "step": 3168, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017630930233281106, + "timestamp": "2025-09-10 02:22:36.938464", + "step": 3169, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:36.969216", + "step": 3169, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032021531369537115, + "timestamp": "2025-09-10 02:22:36.976342", + "step": 3170, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:37.008954", + "step": 3170, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00039557431591674685, + "timestamp": "2025-09-10 02:22:37.013179", + "step": 3171, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:37.043748", + "step": 3171, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010184214916080236, + "timestamp": "2025-09-10 02:22:37.074953", + "step": 3172, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:37.105520", + "step": 3172, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00031294874497689307, + "timestamp": "2025-09-10 02:22:37.113472", + "step": 3173, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:37.149177", + "step": 3173, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008610020158812404, + "timestamp": "2025-09-10 02:22:37.162901", + "step": 3174, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:37.197818", + "step": 3174, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015279522631317377, + "timestamp": "2025-09-10 02:22:37.211216", + "step": 3175, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:37.242396", + "step": 3175, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010398059152066708, + "timestamp": "2025-09-10 02:22:37.273537", + "step": 3176, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:37.304358", + "step": 3176, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012553682317957282, + "timestamp": "2025-09-10 02:22:37.306608", + "step": 3177, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:37.337888", + "step": 3177, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010120292427018285, + "timestamp": "2025-09-10 02:22:37.344761", + "step": 3178, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:37.375351", + "step": 3178, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003787730587646365, + "timestamp": "2025-09-10 02:22:37.383017", + "step": 3179, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:22:37.413647", + "step": 3179, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013505556620657444, + "timestamp": "2025-09-10 02:22:37.437644", + "step": 3180, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:37.468195", + "step": 3180, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003877087030559778, + "timestamp": "2025-09-10 02:22:37.470460", + "step": 3181, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:37.501096", + "step": 3181, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00047005919623188674, + "timestamp": "2025-09-10 02:22:37.511281", + "step": 3182, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:37.541506", + "step": 3182, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027071668300777674, + "timestamp": "2025-09-10 02:22:37.545685", + "step": 3183, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:37.581163", + "step": 3183, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015775591600686312, + "timestamp": "2025-09-10 02:22:37.606764", + "step": 3184, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:37.637287", + "step": 3184, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001980002736672759, + "timestamp": "2025-09-10 02:22:37.642280", + "step": 3185, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:37.673723", + "step": 3185, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013841536128893495, + "timestamp": "2025-09-10 02:22:37.685883", + "step": 3186, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:37.718254", + "step": 3186, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009568403474986553, + "timestamp": "2025-09-10 02:22:37.725579", + "step": 3187, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:37.757250", + "step": 3187, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0486009381711483, + "timestamp": "2025-09-10 02:22:37.785068", + "step": 3188, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:37.817158", + "step": 3188, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008468760177493095, + "timestamp": "2025-09-10 02:22:37.826681", + "step": 3189, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:37.858796", + "step": 3189, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004440871067345142, + "timestamp": "2025-09-10 02:22:37.868534", + "step": 3190, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:37.899339", + "step": 3190, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.039386093616485596, + "timestamp": "2025-09-10 02:22:37.906455", + "step": 3191, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:37.937573", + "step": 3191, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003340385155752301, + "timestamp": "2025-09-10 02:22:37.965889", + "step": 3192, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:37.997269", + "step": 3192, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004237064626067877, + "timestamp": "2025-09-10 02:22:38.002480", + "step": 3193, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:38.034109", + "step": 3193, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010632021352648735, + "timestamp": "2025-09-10 02:22:38.045869", + "step": 3194, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:22:38.084477", + "step": 3194, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007325800834223628, + "timestamp": "2025-09-10 02:22:38.100425", + "step": 3195, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:38.131176", + "step": 3195, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.4420535889221355e-05, + "timestamp": "2025-09-10 02:22:38.156710", + "step": 3196, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:38.189240", + "step": 3196, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000553667254280299, + "timestamp": "2025-09-10 02:22:38.193935", + "step": 3197, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:22:38.235326", + "step": 3197, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02702312171459198, + "timestamp": "2025-09-10 02:22:38.252626", + "step": 3198, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:38.283741", + "step": 3198, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000169062870554626, + "timestamp": "2025-09-10 02:22:38.290898", + "step": 3199, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:38.322708", + "step": 3199, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018326956778764725, + "timestamp": "2025-09-10 02:22:38.346260", + "step": 3200, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:38.377001", + "step": 3200, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025423362967558205, + "timestamp": "2025-09-10 02:22:38.382396", + "step": 3201, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:22:38.422092", + "step": 3201, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021461385767906904, + "timestamp": "2025-09-10 02:22:38.438228", + "step": 3202, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:38.473102", + "step": 3202, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002993656671606004, + "timestamp": "2025-09-10 02:22:38.486818", + "step": 3203, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:22:38.522162", + "step": 3203, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004899102495983243, + "timestamp": "2025-09-10 02:22:38.557079", + "step": 3204, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:38.587494", + "step": 3204, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012291016755625606, + "timestamp": "2025-09-10 02:22:38.589920", + "step": 3205, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:38.620619", + "step": 3205, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01229874137789011, + "timestamp": "2025-09-10 02:22:38.624935", + "step": 3206, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:38.656016", + "step": 3206, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004454140434972942, + "timestamp": "2025-09-10 02:22:38.666814", + "step": 3207, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:38.707752", + "step": 3207, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010696732351789251, + "timestamp": "2025-09-10 02:22:38.735577", + "step": 3208, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:38.766238", + "step": 3208, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006230357685126364, + "timestamp": "2025-09-10 02:22:38.774174", + "step": 3209, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:22:38.816179", + "step": 3209, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018624786753207445, + "timestamp": "2025-09-10 02:22:38.833519", + "step": 3210, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:22:38.872502", + "step": 3210, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004688594490289688, + "timestamp": "2025-09-10 02:22:38.888433", + "step": 3211, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:38.920488", + "step": 3211, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005589794600382447, + "timestamp": "2025-09-10 02:22:38.947973", + "step": 3212, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:38.980672", + "step": 3212, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001154548255726695, + "timestamp": "2025-09-10 02:22:38.988108", + "step": 3213, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:39.018998", + "step": 3213, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023087533190846443, + "timestamp": "2025-09-10 02:22:39.030941", + "step": 3214, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:39.064641", + "step": 3214, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000464627897599712, + "timestamp": "2025-09-10 02:22:39.071635", + "step": 3215, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:39.104297", + "step": 3215, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038381904596462846, + "timestamp": "2025-09-10 02:22:39.132049", + "step": 3216, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:22:39.165104", + "step": 3216, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001965318340808153, + "timestamp": "2025-09-10 02:22:39.178420", + "step": 3217, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:39.211715", + "step": 3217, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007221942069008946, + "timestamp": "2025-09-10 02:22:39.225098", + "step": 3218, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:39.255813", + "step": 3218, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005813446477986872, + "timestamp": "2025-09-10 02:22:39.262704", + "step": 3219, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:39.293375", + "step": 3219, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018721247324720025, + "timestamp": "2025-09-10 02:22:39.318543", + "step": 3220, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:39.349428", + "step": 3220, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010139280930161476, + "timestamp": "2025-09-10 02:22:39.353948", + "step": 3221, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:39.384772", + "step": 3221, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010682783322408795, + "timestamp": "2025-09-10 02:22:39.395352", + "step": 3222, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:39.427058", + "step": 3222, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014338502660393715, + "timestamp": "2025-09-10 02:22:39.433513", + "step": 3223, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:39.465088", + "step": 3223, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01097325049340725, + "timestamp": "2025-09-10 02:22:39.493428", + "step": 3224, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:39.525266", + "step": 3224, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00029258467839099467, + "timestamp": "2025-09-10 02:22:39.529639", + "step": 3225, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:39.561053", + "step": 3225, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00042427852167747915, + "timestamp": "2025-09-10 02:22:39.567975", + "step": 3226, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:39.600915", + "step": 3226, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023932938929647207, + "timestamp": "2025-09-10 02:22:39.605326", + "step": 3227, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:39.637730", + "step": 3227, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0120490537956357, + "timestamp": "2025-09-10 02:22:39.666437", + "step": 3228, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:39.698073", + "step": 3228, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008032119949348271, + "timestamp": "2025-09-10 02:22:39.700494", + "step": 3229, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:39.732562", + "step": 3229, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006666731787845492, + "timestamp": "2025-09-10 02:22:39.739746", + "step": 3230, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:39.774952", + "step": 3230, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01671520434319973, + "timestamp": "2025-09-10 02:22:39.788296", + "step": 3231, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:39.839699", + "step": 3231, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014305815566331148, + "timestamp": "2025-09-10 02:22:39.867652", + "step": 3232, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:39.899367", + "step": 3232, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004569535667542368, + "timestamp": "2025-09-10 02:22:39.904099", + "step": 3233, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:39.934344", + "step": 3233, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045302906073629856, + "timestamp": "2025-09-10 02:22:39.938752", + "step": 3234, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:22:49.945100", + "step": 3234, + "epoch": 2 + }, + { + "type": "pplx", + "content": 23502743.08364132, + "timestamp": "2025-09-10 02:22:49.948126", + "step": 3234, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:49.978817", + "step": 3234, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001856299233622849, + "timestamp": "2025-09-10 02:22:49.982354", + "step": 3235, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:50.013341", + "step": 3235, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000491947284899652, + "timestamp": "2025-09-10 02:22:50.043579", + "step": 3236, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:50.073820", + "step": 3236, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004294110112823546, + "timestamp": "2025-09-10 02:22:50.078443", + "step": 3237, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:50.108498", + "step": 3237, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006372739444486797, + "timestamp": "2025-09-10 02:22:50.121008", + "step": 3238, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:50.152654", + "step": 3238, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037969518452882767, + "timestamp": "2025-09-10 02:22:50.163174", + "step": 3239, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:50.193750", + "step": 3239, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004036373575218022, + "timestamp": "2025-09-10 02:22:50.224719", + "step": 3240, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:50.254784", + "step": 3240, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003846238541882485, + "timestamp": "2025-09-10 02:22:50.259442", + "step": 3241, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:50.289815", + "step": 3241, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02323022112250328, + "timestamp": "2025-09-10 02:22:50.300221", + "step": 3242, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:50.331983", + "step": 3242, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003217768098693341, + "timestamp": "2025-09-10 02:22:50.339433", + "step": 3243, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 784 + ], + "flops": 23255845310656 + }, + "timestamp": "2025-09-10 02:22:50.404351", + "step": 3243, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018644100055098534, + "timestamp": "2025-09-10 02:22:50.452450", + "step": 3244, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:50.483616", + "step": 3244, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001525462488643825, + "timestamp": "2025-09-10 02:22:50.493622", + "step": 3245, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:50.528687", + "step": 3245, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013417869340628386, + "timestamp": "2025-09-10 02:22:50.532530", + "step": 3246, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:50.563727", + "step": 3246, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008608666248619556, + "timestamp": "2025-09-10 02:22:50.574306", + "step": 3247, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:50.605993", + "step": 3247, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041623544530011714, + "timestamp": "2025-09-10 02:22:50.636683", + "step": 3248, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:22:50.672229", + "step": 3248, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026598642580211163, + "timestamp": "2025-09-10 02:22:50.688083", + "step": 3249, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:50.722087", + "step": 3249, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033846586011350155, + "timestamp": "2025-09-10 02:22:50.724450", + "step": 3250, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:50.760297", + "step": 3250, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024376294459216297, + "timestamp": "2025-09-10 02:22:50.766874", + "step": 3251, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:50.797846", + "step": 3251, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008604326867498457, + "timestamp": "2025-09-10 02:22:50.826404", + "step": 3252, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:50.859109", + "step": 3252, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002787252014968544, + "timestamp": "2025-09-10 02:22:50.861302", + "step": 3253, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:50.891014", + "step": 3253, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013136464403942227, + "timestamp": "2025-09-10 02:22:50.896621", + "step": 3254, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:50.929511", + "step": 3254, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002793243620544672, + "timestamp": "2025-09-10 02:22:50.935575", + "step": 3255, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:50.968465", + "step": 3255, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005823360406793654, + "timestamp": "2025-09-10 02:22:50.996851", + "step": 3256, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:51.028652", + "step": 3256, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009857832454144955, + "timestamp": "2025-09-10 02:22:51.033616", + "step": 3257, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:51.069361", + "step": 3257, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003967168158851564, + "timestamp": "2025-09-10 02:22:51.073645", + "step": 3258, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:51.106717", + "step": 3258, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002505451557226479, + "timestamp": "2025-09-10 02:22:51.114402", + "step": 3259, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:51.148211", + "step": 3259, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007157556829042733, + "timestamp": "2025-09-10 02:22:51.175925", + "step": 3260, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:22:51.216586", + "step": 3260, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00013806803326588124, + "timestamp": "2025-09-10 02:22:51.231738", + "step": 3261, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:51.268896", + "step": 3261, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021779598318971694, + "timestamp": "2025-09-10 02:22:51.278799", + "step": 3262, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:51.316727", + "step": 3262, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008616751991212368, + "timestamp": "2025-09-10 02:22:51.320862", + "step": 3263, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:22:51.367488", + "step": 3263, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011451850878074765, + "timestamp": "2025-09-10 02:22:51.406167", + "step": 3264, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:51.442444", + "step": 3264, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010292161023244262, + "timestamp": "2025-09-10 02:22:51.455509", + "step": 3265, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:51.502272", + "step": 3265, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004708148539066315, + "timestamp": "2025-09-10 02:22:51.506216", + "step": 3266, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:51.540036", + "step": 3266, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003200080245733261, + "timestamp": "2025-09-10 02:22:51.547440", + "step": 3267, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:51.597175", + "step": 3267, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003564673534128815, + "timestamp": "2025-09-10 02:22:51.628072", + "step": 3268, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:51.660155", + "step": 3268, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001860837102867663, + "timestamp": "2025-09-10 02:22:51.664785", + "step": 3269, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:51.698093", + "step": 3269, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005392608582042158, + "timestamp": "2025-09-10 02:22:51.703230", + "step": 3270, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:51.742970", + "step": 3270, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032175458036363125, + "timestamp": "2025-09-10 02:22:51.750416", + "step": 3271, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:51.783389", + "step": 3271, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009787830524146557, + "timestamp": "2025-09-10 02:22:51.811842", + "step": 3272, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:51.842319", + "step": 3272, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008516389061696827, + "timestamp": "2025-09-10 02:22:51.844388", + "step": 3273, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:51.878358", + "step": 3273, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036891864147037268, + "timestamp": "2025-09-10 02:22:51.882661", + "step": 3274, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:51.916027", + "step": 3274, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00042585088522173464, + "timestamp": "2025-09-10 02:22:51.924005", + "step": 3275, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:51.954832", + "step": 3275, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004672827199101448, + "timestamp": "2025-09-10 02:22:51.983679", + "step": 3276, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:52.015837", + "step": 3276, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011190387886017561, + "timestamp": "2025-09-10 02:22:52.020512", + "step": 3277, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:52.054019", + "step": 3277, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003633495362009853, + "timestamp": "2025-09-10 02:22:52.060849", + "step": 3278, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:52.094823", + "step": 3278, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030928533524274826, + "timestamp": "2025-09-10 02:22:52.107417", + "step": 3279, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:52.140009", + "step": 3279, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003447837952990085, + "timestamp": "2025-09-10 02:22:52.172284", + "step": 3280, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:52.207809", + "step": 3280, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003286560531705618, + "timestamp": "2025-09-10 02:22:52.210106", + "step": 3281, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:52.240739", + "step": 3281, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026785004884004593, + "timestamp": "2025-09-10 02:22:52.251175", + "step": 3282, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:22:52.287644", + "step": 3282, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004041456617414951, + "timestamp": "2025-09-10 02:22:52.301581", + "step": 3283, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:52.333684", + "step": 3283, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004617027007043362, + "timestamp": "2025-09-10 02:22:52.361945", + "step": 3284, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:52.393199", + "step": 3284, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017622795421630144, + "timestamp": "2025-09-10 02:22:52.398639", + "step": 3285, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:52.429068", + "step": 3285, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006238414789550006, + "timestamp": "2025-09-10 02:22:52.439567", + "step": 3286, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:52.470208", + "step": 3286, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005539960693567991, + "timestamp": "2025-09-10 02:22:52.474511", + "step": 3287, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:52.504593", + "step": 3287, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032464484684169292, + "timestamp": "2025-09-10 02:22:52.533337", + "step": 3288, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:52.563908", + "step": 3288, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018969980301335454, + "timestamp": "2025-09-10 02:22:52.572641", + "step": 3289, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:52.603471", + "step": 3289, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011059996904805303, + "timestamp": "2025-09-10 02:22:52.607788", + "step": 3290, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:52.639390", + "step": 3290, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009358406998217106, + "timestamp": "2025-09-10 02:22:52.651643", + "step": 3291, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:52.683118", + "step": 3291, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013368077343329787, + "timestamp": "2025-09-10 02:22:52.711542", + "step": 3292, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:52.742817", + "step": 3292, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013290916569530964, + "timestamp": "2025-09-10 02:22:52.744954", + "step": 3293, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:52.776523", + "step": 3293, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004001455847173929, + "timestamp": "2025-09-10 02:22:52.787595", + "step": 3294, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:52.818335", + "step": 3294, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019497517496347427, + "timestamp": "2025-09-10 02:22:52.822799", + "step": 3295, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:52.855538", + "step": 3295, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005138739361427724, + "timestamp": "2025-09-10 02:22:52.883393", + "step": 3296, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:52.914958", + "step": 3296, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008893569000065327, + "timestamp": "2025-09-10 02:22:52.920142", + "step": 3297, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:22:52.959182", + "step": 3297, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008259877795353532, + "timestamp": "2025-09-10 02:22:52.975036", + "step": 3298, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:53.005523", + "step": 3298, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016621073882561177, + "timestamp": "2025-09-10 02:22:53.013103", + "step": 3299, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:53.046542", + "step": 3299, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002620183164253831, + "timestamp": "2025-09-10 02:22:53.075158", + "step": 3300, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:53.105095", + "step": 3300, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029605585150420666, + "timestamp": "2025-09-10 02:22:53.107245", + "step": 3301, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:53.137003", + "step": 3301, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031393535900861025, + "timestamp": "2025-09-10 02:22:53.143954", + "step": 3302, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:53.174948", + "step": 3302, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002589694457128644, + "timestamp": "2025-09-10 02:22:53.182628", + "step": 3303, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:53.212926", + "step": 3303, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028403718024492264, + "timestamp": "2025-09-10 02:22:53.241438", + "step": 3304, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:53.271110", + "step": 3304, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000695100927259773, + "timestamp": "2025-09-10 02:22:53.275923", + "step": 3305, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:53.306170", + "step": 3305, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012687050737440586, + "timestamp": "2025-09-10 02:22:53.313990", + "step": 3306, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:53.343830", + "step": 3306, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03439050540328026, + "timestamp": "2025-09-10 02:22:53.350885", + "step": 3307, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:53.381182", + "step": 3307, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02955719642341137, + "timestamp": "2025-09-10 02:22:53.409822", + "step": 3308, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:53.441083", + "step": 3308, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008426779531873763, + "timestamp": "2025-09-10 02:22:53.449713", + "step": 3309, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:53.480771", + "step": 3309, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014351406134665012, + "timestamp": "2025-09-10 02:22:53.488153", + "step": 3310, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:53.519052", + "step": 3310, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006407542387023568, + "timestamp": "2025-09-10 02:22:53.529339", + "step": 3311, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:53.562755", + "step": 3311, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013966037891805172, + "timestamp": "2025-09-10 02:22:53.597025", + "step": 3312, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:53.627652", + "step": 3312, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011689442908391356, + "timestamp": "2025-09-10 02:22:53.635469", + "step": 3313, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:53.665966", + "step": 3313, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012120773317292333, + "timestamp": "2025-09-10 02:22:53.670069", + "step": 3314, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:53.701243", + "step": 3314, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011072760447859764, + "timestamp": "2025-09-10 02:22:53.705361", + "step": 3315, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:53.735480", + "step": 3315, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01113554835319519, + "timestamp": "2025-09-10 02:22:53.759070", + "step": 3316, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:53.789020", + "step": 3316, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005071196937933564, + "timestamp": "2025-09-10 02:22:53.791168", + "step": 3317, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:53.821706", + "step": 3317, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015270305797457695, + "timestamp": "2025-09-10 02:22:53.829189", + "step": 3318, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:22:53.863710", + "step": 3318, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003202088875696063, + "timestamp": "2025-09-10 02:22:53.877698", + "step": 3319, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:53.908537", + "step": 3319, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06514207273721695, + "timestamp": "2025-09-10 02:22:53.941350", + "step": 3320, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:22:53.973867", + "step": 3320, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015472178347408772, + "timestamp": "2025-09-10 02:22:53.987005", + "step": 3321, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:54.017702", + "step": 3321, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023015392944216728, + "timestamp": "2025-09-10 02:22:54.024625", + "step": 3322, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:54.054972", + "step": 3322, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020949339494109154, + "timestamp": "2025-09-10 02:22:54.059051", + "step": 3323, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:54.089059", + "step": 3323, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001148775772890076, + "timestamp": "2025-09-10 02:22:54.116873", + "step": 3324, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:54.148560", + "step": 3324, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014125898014754057, + "timestamp": "2025-09-10 02:22:54.159196", + "step": 3325, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 576 + ], + "flops": 17085996872448 + }, + "timestamp": "2025-09-10 02:22:54.210053", + "step": 3325, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030126813799142838, + "timestamp": "2025-09-10 02:22:54.229548", + "step": 3326, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:54.271396", + "step": 3326, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005848866421729326, + "timestamp": "2025-09-10 02:22:54.281730", + "step": 3327, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:54.316933", + "step": 3327, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003858902200590819, + "timestamp": "2025-09-10 02:22:54.343095", + "step": 3328, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:54.377475", + "step": 3328, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009587566019035876, + "timestamp": "2025-09-10 02:22:54.382978", + "step": 3329, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:22:54.422472", + "step": 3329, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006397636607289314, + "timestamp": "2025-09-10 02:22:54.436228", + "step": 3330, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:22:54.466890", + "step": 3330, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004409528919495642, + "timestamp": "2025-09-10 02:22:54.470871", + "step": 3331, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:54.502681", + "step": 3331, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.06097668781876564, + "timestamp": "2025-09-10 02:22:54.527627", + "step": 3332, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:54.559173", + "step": 3332, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003068390360567719, + "timestamp": "2025-09-10 02:22:54.564065", + "step": 3333, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:54.599518", + "step": 3333, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04409003257751465, + "timestamp": "2025-09-10 02:22:54.603863", + "step": 3334, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:22:54.642867", + "step": 3334, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008692757226526737, + "timestamp": "2025-09-10 02:22:54.646571", + "step": 3335, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:54.678746", + "step": 3335, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031811974942684174, + "timestamp": "2025-09-10 02:22:54.704331", + "step": 3336, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:54.736636", + "step": 3336, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01294754259288311, + "timestamp": "2025-09-10 02:22:54.744616", + "step": 3337, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:54.776905", + "step": 3337, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024945108452811837, + "timestamp": "2025-09-10 02:22:54.784345", + "step": 3338, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:54.819269", + "step": 3338, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003305058693513274, + "timestamp": "2025-09-10 02:22:54.829892", + "step": 3339, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:54.860214", + "step": 3339, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017841076478362083, + "timestamp": "2025-09-10 02:22:54.885431", + "step": 3340, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:54.917395", + "step": 3340, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000721210555639118, + "timestamp": "2025-09-10 02:22:54.922181", + "step": 3341, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:22:54.954585", + "step": 3341, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028600546065717936, + "timestamp": "2025-09-10 02:22:54.956462", + "step": 3342, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:54.986851", + "step": 3342, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04634520411491394, + "timestamp": "2025-09-10 02:22:54.990315", + "step": 3343, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:55.020295", + "step": 3343, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012477157637476921, + "timestamp": "2025-09-10 02:22:55.048779", + "step": 3344, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:22:55.080169", + "step": 3344, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029694880358874798, + "timestamp": "2025-09-10 02:22:55.092968", + "step": 3345, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:55.123580", + "step": 3345, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003129235119558871, + "timestamp": "2025-09-10 02:22:55.127885", + "step": 3346, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:55.158608", + "step": 3346, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009798407554626465, + "timestamp": "2025-09-10 02:22:55.170856", + "step": 3347, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:55.201318", + "step": 3347, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0179388877004385, + "timestamp": "2025-09-10 02:22:55.228995", + "step": 3348, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:55.259186", + "step": 3348, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008463481441140175, + "timestamp": "2025-09-10 02:22:55.264411", + "step": 3349, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:55.295248", + "step": 3349, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007734844111837447, + "timestamp": "2025-09-10 02:22:55.302578", + "step": 3350, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:55.333607", + "step": 3350, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07793903350830078, + "timestamp": "2025-09-10 02:22:55.337998", + "step": 3351, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:55.367868", + "step": 3351, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045389196020551026, + "timestamp": "2025-09-10 02:22:55.396596", + "step": 3352, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:55.426948", + "step": 3352, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01835579052567482, + "timestamp": "2025-09-10 02:22:55.432266", + "step": 3353, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:55.462789", + "step": 3353, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028381929732859135, + "timestamp": "2025-09-10 02:22:55.474970", + "step": 3354, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:55.505419", + "step": 3354, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001101338886655867, + "timestamp": "2025-09-10 02:22:55.512718", + "step": 3355, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:55.542884", + "step": 3355, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006561249028891325, + "timestamp": "2025-09-10 02:22:55.571365", + "step": 3356, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:55.601655", + "step": 3356, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0058148703537881374, + "timestamp": "2025-09-10 02:22:55.612048", + "step": 3357, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:55.642061", + "step": 3357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029688451904803514, + "timestamp": "2025-09-10 02:22:55.644840", + "step": 3358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:22:55.675408", + "step": 3358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01831236109137535, + "timestamp": "2025-09-10 02:22:55.687572", + "step": 3359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:55.717698", + "step": 3359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040482617914676666, + "timestamp": "2025-09-10 02:22:55.742493", + "step": 3360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:55.774544", + "step": 3360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021457521244883537, + "timestamp": "2025-09-10 02:22:55.782464", + "step": 3361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:55.813342", + "step": 3361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000866633839905262, + "timestamp": "2025-09-10 02:22:55.823534", + "step": 3362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:55.853993", + "step": 3362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011205156333744526, + "timestamp": "2025-09-10 02:22:55.861347", + "step": 3363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:55.891354", + "step": 3363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007767033530399203, + "timestamp": "2025-09-10 02:22:55.916428", + "step": 3364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:55.946368", + "step": 3364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002113510388880968, + "timestamp": "2025-09-10 02:22:55.951161", + "step": 3365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:22:55.983402", + "step": 3365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01282955426722765, + "timestamp": "2025-09-10 02:22:55.990965", + "step": 3366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:22:56.022097", + "step": 3366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011836833320558071, + "timestamp": "2025-09-10 02:22:56.028672", + "step": 3367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:22:56.059408", + "step": 3367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010014675557613373, + "timestamp": "2025-09-10 02:22:56.090323", + "step": 3368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:22:56.119616", + "step": 3368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004872338380664587, + "timestamp": "2025-09-10 02:22:56.121441", + "step": 3369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:56.152678", + "step": 3369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007102675270289183, + "timestamp": "2025-09-10 02:22:56.156926", + "step": 3370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:22:56.187059", + "step": 3370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036141786258667707, + "timestamp": "2025-09-10 02:22:56.191742", + "step": 3371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:56.222806", + "step": 3371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022325122263282537, + "timestamp": "2025-09-10 02:22:56.254610", + "step": 3372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:22:56.286385", + "step": 3372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027134040370583534, + "timestamp": "2025-09-10 02:22:56.291972", + "step": 3373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:56.322260", + "step": 3373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011692258529365063, + "timestamp": "2025-09-10 02:22:56.329292", + "step": 3374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:22:56.360309", + "step": 3374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016718130791559815, + "timestamp": "2025-09-10 02:22:56.370979", + "step": 3375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:22:56.408658", + "step": 3375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012171820737421513, + "timestamp": "2025-09-10 02:22:56.445450", + "step": 3376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:22:56.475527", + "step": 3376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001996230101212859, + "timestamp": "2025-09-10 02:22:56.477359", + "step": 3377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:22:56.506870", + "step": 3377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00892335269600153, + "timestamp": "2025-09-10 02:22:56.509304", + "step": 3378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:22:56.539112", + "step": 3378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013574354350566864, + "timestamp": "2025-09-10 02:22:56.546166", + "step": 3379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:22:56.577810", + "step": 3379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010744870640337467, + "timestamp": "2025-09-10 02:22:56.611237", + "step": 3380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:22:56.651979", + "step": 3380, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012002465315163136, + "timestamp": "2025-09-10 02:22:56.665283", + "step": 3381, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:23:06.754647", + "step": 3381, + "epoch": 2 + }, + { + "type": "pplx", + "content": 19503867.643994175, + "timestamp": "2025-09-10 02:23:06.757515", + "step": 3381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:23:06.794649", + "step": 3381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004555174149572849, + "timestamp": "2025-09-10 02:23:06.810538", + "step": 3382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:06.845093", + "step": 3382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012583248317241669, + "timestamp": "2025-09-10 02:23:06.858473", + "step": 3383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:06.890181", + "step": 3383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03392893821001053, + "timestamp": "2025-09-10 02:23:06.922817", + "step": 3384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:23:06.957562", + "step": 3384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004845472984015942, + "timestamp": "2025-09-10 02:23:06.970855", + "step": 3385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:07.005311", + "step": 3385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021397171076387167, + "timestamp": "2025-09-10 02:23:07.015713", + "step": 3386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:07.050165", + "step": 3386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012018001638352871, + "timestamp": "2025-09-10 02:23:07.059697", + "step": 3387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:07.092906", + "step": 3387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004577454179525375, + "timestamp": "2025-09-10 02:23:07.117881", + "step": 3388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:07.149652", + "step": 3388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010535781271755695, + "timestamp": "2025-09-10 02:23:07.151782", + "step": 3389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:07.183243", + "step": 3389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005816023796796799, + "timestamp": "2025-09-10 02:23:07.190017", + "step": 3390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:07.221365", + "step": 3390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00614953925833106, + "timestamp": "2025-09-10 02:23:07.223727", + "step": 3391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:07.255452", + "step": 3391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003558420343324542, + "timestamp": "2025-09-10 02:23:07.283727", + "step": 3392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:07.314913", + "step": 3392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005937974434345961, + "timestamp": "2025-09-10 02:23:07.319177", + "step": 3393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:07.351187", + "step": 3393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01408356986939907, + "timestamp": "2025-09-10 02:23:07.363532", + "step": 3394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:07.394337", + "step": 3394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014379842905327678, + "timestamp": "2025-09-10 02:23:07.398046", + "step": 3395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:07.429030", + "step": 3395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0067471894435584545, + "timestamp": "2025-09-10 02:23:07.454158", + "step": 3396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:07.486942", + "step": 3396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013919052667915821, + "timestamp": "2025-09-10 02:23:07.495675", + "step": 3397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:23:07.530583", + "step": 3397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027160770259797573, + "timestamp": "2025-09-10 02:23:07.544500", + "step": 3398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:07.577910", + "step": 3398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004795930348336697, + "timestamp": "2025-09-10 02:23:07.587618", + "step": 3399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:07.619070", + "step": 3399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005166996270418167, + "timestamp": "2025-09-10 02:23:07.647255", + "step": 3400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:07.689699", + "step": 3400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037923615891486406, + "timestamp": "2025-09-10 02:23:07.694920", + "step": 3401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:07.725938", + "step": 3401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003060044953599572, + "timestamp": "2025-09-10 02:23:07.737891", + "step": 3402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:23:07.778578", + "step": 3402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001733014010824263, + "timestamp": "2025-09-10 02:23:07.794729", + "step": 3403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:07.825397", + "step": 3403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021855314262211323, + "timestamp": "2025-09-10 02:23:07.853881", + "step": 3404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:07.887128", + "step": 3404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03904338553547859, + "timestamp": "2025-09-10 02:23:07.891696", + "step": 3405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:07.922344", + "step": 3405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007601436227560043, + "timestamp": "2025-09-10 02:23:07.934606", + "step": 3406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:07.966596", + "step": 3406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009114629589021206, + "timestamp": "2025-09-10 02:23:07.973986", + "step": 3407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:08.005183", + "step": 3407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021965792402625084, + "timestamp": "2025-09-10 02:23:08.036150", + "step": 3408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:08.068219", + "step": 3408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007908275350928307, + "timestamp": "2025-09-10 02:23:08.081050", + "step": 3409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:08.112268", + "step": 3409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009908582083880901, + "timestamp": "2025-09-10 02:23:08.116513", + "step": 3410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:08.150016", + "step": 3410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006652395240962505, + "timestamp": "2025-09-10 02:23:08.163360", + "step": 3411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:08.195014", + "step": 3411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005871969740837812, + "timestamp": "2025-09-10 02:23:08.225585", + "step": 3412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:08.257697", + "step": 3412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007932118140161037, + "timestamp": "2025-09-10 02:23:08.262776", + "step": 3413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:08.294989", + "step": 3413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011967500671744347, + "timestamp": "2025-09-10 02:23:08.302832", + "step": 3414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:08.334066", + "step": 3414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017909369198605418, + "timestamp": "2025-09-10 02:23:08.341717", + "step": 3415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:08.372844", + "step": 3415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007196805439889431, + "timestamp": "2025-09-10 02:23:08.401415", + "step": 3416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:08.432415", + "step": 3416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005088069010525942, + "timestamp": "2025-09-10 02:23:08.437571", + "step": 3417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:08.468347", + "step": 3417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011828926391899586, + "timestamp": "2025-09-10 02:23:08.472861", + "step": 3418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:23:08.507358", + "step": 3418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015730817103758454, + "timestamp": "2025-09-10 02:23:08.520988", + "step": 3419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:08.553080", + "step": 3419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010220969095826149, + "timestamp": "2025-09-10 02:23:08.583498", + "step": 3420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:23:08.623942", + "step": 3420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031598855275660753, + "timestamp": "2025-09-10 02:23:08.640953", + "step": 3421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:08.672646", + "step": 3421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028410514816641808, + "timestamp": "2025-09-10 02:23:08.679740", + "step": 3422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:08.710963", + "step": 3422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003635372733697295, + "timestamp": "2025-09-10 02:23:08.717809", + "step": 3423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:08.748767", + "step": 3423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010761960409581661, + "timestamp": "2025-09-10 02:23:08.776510", + "step": 3424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:08.807771", + "step": 3424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029311534017324448, + "timestamp": "2025-09-10 02:23:08.815772", + "step": 3425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:08.846343", + "step": 3425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005955227185040712, + "timestamp": "2025-09-10 02:23:08.853700", + "step": 3426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:08.887227", + "step": 3426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019082642393186688, + "timestamp": "2025-09-10 02:23:08.894193", + "step": 3427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:08.927241", + "step": 3427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.036622676998376846, + "timestamp": "2025-09-10 02:23:08.958448", + "step": 3428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:08.991248", + "step": 3428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016960853710770607, + "timestamp": "2025-09-10 02:23:08.996726", + "step": 3429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:09.026889", + "step": 3429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013002237305045128, + "timestamp": "2025-09-10 02:23:09.034596", + "step": 3430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:09.065070", + "step": 3430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008839133195579052, + "timestamp": "2025-09-10 02:23:09.071772", + "step": 3431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:09.104799", + "step": 3431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01928016170859337, + "timestamp": "2025-09-10 02:23:09.132528", + "step": 3432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:09.163379", + "step": 3432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017680247547104955, + "timestamp": "2025-09-10 02:23:09.165668", + "step": 3433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:09.195755", + "step": 3433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012528672814369202, + "timestamp": "2025-09-10 02:23:09.198349", + "step": 3434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:09.228746", + "step": 3434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0041799359023571014, + "timestamp": "2025-09-10 02:23:09.235932", + "step": 3435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:09.266783", + "step": 3435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00306068011559546, + "timestamp": "2025-09-10 02:23:09.295547", + "step": 3436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:09.326486", + "step": 3436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031013197731226683, + "timestamp": "2025-09-10 02:23:09.331936", + "step": 3437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:09.362497", + "step": 3437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001428862102329731, + "timestamp": "2025-09-10 02:23:09.369800", + "step": 3438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:23:09.404365", + "step": 3438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005541969439946115, + "timestamp": "2025-09-10 02:23:09.418183", + "step": 3439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:09.449030", + "step": 3439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004622712731361389, + "timestamp": "2025-09-10 02:23:09.478004", + "step": 3440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:09.509849", + "step": 3440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019380019512027502, + "timestamp": "2025-09-10 02:23:09.514406", + "step": 3441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:09.545872", + "step": 3441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003715726314112544, + "timestamp": "2025-09-10 02:23:09.556902", + "step": 3442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:09.588546", + "step": 3442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006264827214181423, + "timestamp": "2025-09-10 02:23:09.595030", + "step": 3443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:09.627432", + "step": 3443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004857002291828394, + "timestamp": "2025-09-10 02:23:09.658245", + "step": 3444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:09.693492", + "step": 3444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002044878900051117, + "timestamp": "2025-09-10 02:23:09.700332", + "step": 3445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:09.733066", + "step": 3445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04166651517152786, + "timestamp": "2025-09-10 02:23:09.739766", + "step": 3446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:09.772160", + "step": 3446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031065084040164948, + "timestamp": "2025-09-10 02:23:09.782139", + "step": 3447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:09.816534", + "step": 3447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012844757875427604, + "timestamp": "2025-09-10 02:23:09.843914", + "step": 3448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:09.875547", + "step": 3448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019296734826639295, + "timestamp": "2025-09-10 02:23:09.883584", + "step": 3449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:09.916452", + "step": 3449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020907416474074125, + "timestamp": "2025-09-10 02:23:09.927687", + "step": 3450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:09.960487", + "step": 3450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002724104793742299, + "timestamp": "2025-09-10 02:23:09.967106", + "step": 3451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:09.999304", + "step": 3451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012764266692101955, + "timestamp": "2025-09-10 02:23:10.029740", + "step": 3452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:10.061699", + "step": 3452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005641660653054714, + "timestamp": "2025-09-10 02:23:10.063917", + "step": 3453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:10.096530", + "step": 3453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003335190238431096, + "timestamp": "2025-09-10 02:23:10.102996", + "step": 3454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:10.145804", + "step": 3454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014257727889344096, + "timestamp": "2025-09-10 02:23:10.157428", + "step": 3455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:23:10.196714", + "step": 3455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005661274306476116, + "timestamp": "2025-09-10 02:23:10.233271", + "step": 3456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:10.269812", + "step": 3456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007868251414038241, + "timestamp": "2025-09-10 02:23:10.272408", + "step": 3457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:10.311148", + "step": 3457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019356502452865243, + "timestamp": "2025-09-10 02:23:10.318900", + "step": 3458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:10.349817", + "step": 3458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006856503430753946, + "timestamp": "2025-09-10 02:23:10.357731", + "step": 3459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:10.394105", + "step": 3459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010916010942310095, + "timestamp": "2025-09-10 02:23:10.428313", + "step": 3460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:10.463463", + "step": 3460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013903248123824596, + "timestamp": "2025-09-10 02:23:10.469270", + "step": 3461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:10.502983", + "step": 3461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002239000052213669, + "timestamp": "2025-09-10 02:23:10.506401", + "step": 3462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:10.541230", + "step": 3462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007613528869114816, + "timestamp": "2025-09-10 02:23:10.543512", + "step": 3463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:23:10.581849", + "step": 3463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012689571594819427, + "timestamp": "2025-09-10 02:23:10.616370", + "step": 3464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:10.663739", + "step": 3464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037759561091661453, + "timestamp": "2025-09-10 02:23:10.671022", + "step": 3465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:10.714116", + "step": 3465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006281206151470542, + "timestamp": "2025-09-10 02:23:10.719764", + "step": 3466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:10.752091", + "step": 3466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000767476565670222, + "timestamp": "2025-09-10 02:23:10.759522", + "step": 3467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:10.793859", + "step": 3467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000946753949392587, + "timestamp": "2025-09-10 02:23:10.819850", + "step": 3468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:10.850932", + "step": 3468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014056609943509102, + "timestamp": "2025-09-10 02:23:10.853224", + "step": 3469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:10.886985", + "step": 3469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004704651073552668, + "timestamp": "2025-09-10 02:23:10.894094", + "step": 3470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:10.926723", + "step": 3470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011611180379986763, + "timestamp": "2025-09-10 02:23:10.933919", + "step": 3471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:10.972466", + "step": 3471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004651397932320833, + "timestamp": "2025-09-10 02:23:11.000688", + "step": 3472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:11.033209", + "step": 3472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010988789144903421, + "timestamp": "2025-09-10 02:23:11.038778", + "step": 3473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:11.069426", + "step": 3473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001688135089352727, + "timestamp": "2025-09-10 02:23:11.073789", + "step": 3474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:11.105988", + "step": 3474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008201858960092068, + "timestamp": "2025-09-10 02:23:11.110507", + "step": 3475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:11.142400", + "step": 3475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017634114483371377, + "timestamp": "2025-09-10 02:23:11.167876", + "step": 3476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:11.198653", + "step": 3476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002664331579580903, + "timestamp": "2025-09-10 02:23:11.204161", + "step": 3477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:11.235062", + "step": 3477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006722270627506077, + "timestamp": "2025-09-10 02:23:11.242056", + "step": 3478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:11.272417", + "step": 3478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010332902893424034, + "timestamp": "2025-09-10 02:23:11.276778", + "step": 3479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:11.308084", + "step": 3479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014329560799524188, + "timestamp": "2025-09-10 02:23:11.337032", + "step": 3480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:11.367640", + "step": 3480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009547821246087551, + "timestamp": "2025-09-10 02:23:11.373296", + "step": 3481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:11.406835", + "step": 3481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015632616356015205, + "timestamp": "2025-09-10 02:23:11.420229", + "step": 3482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:11.451294", + "step": 3482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008822629461064935, + "timestamp": "2025-09-10 02:23:11.458362", + "step": 3483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:11.507670", + "step": 3483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.024470632895827293, + "timestamp": "2025-09-10 02:23:11.532646", + "step": 3484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:11.563413", + "step": 3484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002709366148337722, + "timestamp": "2025-09-10 02:23:11.568540", + "step": 3485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:11.602144", + "step": 3485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006141713238321245, + "timestamp": "2025-09-10 02:23:11.615536", + "step": 3486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:11.646966", + "step": 3486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025127717526629567, + "timestamp": "2025-09-10 02:23:11.654566", + "step": 3487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:11.689017", + "step": 3487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025279794353991747, + "timestamp": "2025-09-10 02:23:11.720345", + "step": 3488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:11.751908", + "step": 3488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007817599689587951, + "timestamp": "2025-09-10 02:23:11.754063", + "step": 3489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:11.785830", + "step": 3489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025948244147002697, + "timestamp": "2025-09-10 02:23:11.792944", + "step": 3490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:11.823703", + "step": 3490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002477052912581712, + "timestamp": "2025-09-10 02:23:11.830840", + "step": 3491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:11.861527", + "step": 3491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020622704178094864, + "timestamp": "2025-09-10 02:23:11.889421", + "step": 3492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:11.920080", + "step": 3492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009450044599361718, + "timestamp": "2025-09-10 02:23:11.925297", + "step": 3493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:23:11.963272", + "step": 3493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004593479912728071, + "timestamp": "2025-09-10 02:23:11.978930", + "step": 3494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:12.011166", + "step": 3494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003686284471768886, + "timestamp": "2025-09-10 02:23:12.015801", + "step": 3495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:23:12.051195", + "step": 3495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008759652846492827, + "timestamp": "2025-09-10 02:23:12.085794", + "step": 3496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:12.116178", + "step": 3496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002225195523351431, + "timestamp": "2025-09-10 02:23:12.118332", + "step": 3497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:12.148949", + "step": 3497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005732810823246837, + "timestamp": "2025-09-10 02:23:12.156800", + "step": 3498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:12.188215", + "step": 3498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018855527741834521, + "timestamp": "2025-09-10 02:23:12.199185", + "step": 3499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:12.230391", + "step": 3499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036894562072120607, + "timestamp": "2025-09-10 02:23:12.258639", + "step": 3500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 3500", + "timestamp": "2025-09-10 02:23:16.939336", + "step": 3500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:16.972263", + "step": 3500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015905782347545028, + "timestamp": "2025-09-10 02:23:16.976744", + "step": 3501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:17.011135", + "step": 3501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011544699082151055, + "timestamp": "2025-09-10 02:23:17.017976", + "step": 3502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:17.049103", + "step": 3502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00034944407525472343, + "timestamp": "2025-09-10 02:23:17.051563", + "step": 3503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:17.081918", + "step": 3503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00036820146488025784, + "timestamp": "2025-09-10 02:23:17.109738", + "step": 3504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:17.142389", + "step": 3504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00649291044101119, + "timestamp": "2025-09-10 02:23:17.144832", + "step": 3505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:17.176811", + "step": 3505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009862485341727734, + "timestamp": "2025-09-10 02:23:17.184533", + "step": 3506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:17.215687", + "step": 3506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013766074553132057, + "timestamp": "2025-09-10 02:23:17.223525", + "step": 3507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:17.254771", + "step": 3507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004568330943584442, + "timestamp": "2025-09-10 02:23:17.283323", + "step": 3508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:17.318817", + "step": 3508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012078741565346718, + "timestamp": "2025-09-10 02:23:17.328682", + "step": 3509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:17.359784", + "step": 3509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004991721361875534, + "timestamp": "2025-09-10 02:23:17.367115", + "step": 3510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:17.397399", + "step": 3510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006359159597195685, + "timestamp": "2025-09-10 02:23:17.404120", + "step": 3511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:17.435258", + "step": 3511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014153111260384321, + "timestamp": "2025-09-10 02:23:17.466397", + "step": 3512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:17.498228", + "step": 3512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020929204765707254, + "timestamp": "2025-09-10 02:23:17.503563", + "step": 3513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:17.533849", + "step": 3513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000537493615411222, + "timestamp": "2025-09-10 02:23:17.537962", + "step": 3514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:17.568274", + "step": 3514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007732919184491038, + "timestamp": "2025-09-10 02:23:17.575550", + "step": 3515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:17.610571", + "step": 3515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020853166934102774, + "timestamp": "2025-09-10 02:23:17.638308", + "step": 3516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:17.670249", + "step": 3516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008857056498527527, + "timestamp": "2025-09-10 02:23:17.674492", + "step": 3517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:17.706110", + "step": 3517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001030710176564753, + "timestamp": "2025-09-10 02:23:17.716250", + "step": 3518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:17.751247", + "step": 3518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005307839601300657, + "timestamp": "2025-09-10 02:23:17.759184", + "step": 3519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:17.790289", + "step": 3519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003780403407290578, + "timestamp": "2025-09-10 02:23:17.819065", + "step": 3520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:17.849819", + "step": 3520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005517909303307533, + "timestamp": "2025-09-10 02:23:17.852213", + "step": 3521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:17.883587", + "step": 3521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004757777787744999, + "timestamp": "2025-09-10 02:23:17.891136", + "step": 3522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:17.922466", + "step": 3522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000609197246376425, + "timestamp": "2025-09-10 02:23:17.927193", + "step": 3523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:17.958742", + "step": 3523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007339877774938941, + "timestamp": "2025-09-10 02:23:17.987436", + "step": 3524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:18.020103", + "step": 3524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018651520367711782, + "timestamp": "2025-09-10 02:23:18.028154", + "step": 3525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:23:18.066961", + "step": 3525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003974412567913532, + "timestamp": "2025-09-10 02:23:18.082846", + "step": 3526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:18.114410", + "step": 3526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008718844619579613, + "timestamp": "2025-09-10 02:23:18.118537", + "step": 3527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:18.150124", + "step": 3527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00030707629048265517, + "timestamp": "2025-09-10 02:23:18.178486", + "step": 3528, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:23:28.262537", + "step": 3528, + "epoch": 2 + }, + { + "type": "pplx", + "content": 22053610.470987573, + "timestamp": "2025-09-10 02:23:28.265279", + "step": 3528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:28.295598", + "step": 3528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019998988136649132, + "timestamp": "2025-09-10 02:23:28.303559", + "step": 3529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:28.335417", + "step": 3529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013725848868489265, + "timestamp": "2025-09-10 02:23:28.345349", + "step": 3530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:28.376647", + "step": 3530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001963739050552249, + "timestamp": "2025-09-10 02:23:28.381047", + "step": 3531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:23:28.419680", + "step": 3531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01121476013213396, + "timestamp": "2025-09-10 02:23:28.456740", + "step": 3532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:28.487527", + "step": 3532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019301848078612238, + "timestamp": "2025-09-10 02:23:28.496081", + "step": 3533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:28.527158", + "step": 3533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023806169629096985, + "timestamp": "2025-09-10 02:23:28.539780", + "step": 3534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:28.570913", + "step": 3534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010761814191937447, + "timestamp": "2025-09-10 02:23:28.575368", + "step": 3535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:28.606462", + "step": 3535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018318326328881085, + "timestamp": "2025-09-10 02:23:28.635024", + "step": 3536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:23:28.665441", + "step": 3536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0066347974352538586, + "timestamp": "2025-09-10 02:23:28.667441", + "step": 3537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:28.698524", + "step": 3537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00013012031558901072, + "timestamp": "2025-09-10 02:23:28.703094", + "step": 3538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:23:28.747230", + "step": 3538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027974756085313857, + "timestamp": "2025-09-10 02:23:28.764985", + "step": 3539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:28.795795", + "step": 3539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003288812702521682, + "timestamp": "2025-09-10 02:23:28.827880", + "step": 3540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:28.860596", + "step": 3540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002967107866425067, + "timestamp": "2025-09-10 02:23:28.862803", + "step": 3541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:28.893979", + "step": 3541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013638153905048966, + "timestamp": "2025-09-10 02:23:28.900819", + "step": 3542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:28.931708", + "step": 3542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009955601999536157, + "timestamp": "2025-09-10 02:23:28.938584", + "step": 3543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:28.970094", + "step": 3543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005556150339543819, + "timestamp": "2025-09-10 02:23:29.002133", + "step": 3544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:29.032247", + "step": 3544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003385456802789122, + "timestamp": "2025-09-10 02:23:29.034306", + "step": 3545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:29.064533", + "step": 3545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0076200878247618675, + "timestamp": "2025-09-10 02:23:29.071305", + "step": 3546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:29.101681", + "step": 3546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006286826450377703, + "timestamp": "2025-09-10 02:23:29.108661", + "step": 3547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:29.139842", + "step": 3547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003426824405323714, + "timestamp": "2025-09-10 02:23:29.173020", + "step": 3548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:29.204891", + "step": 3548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034861672669649124, + "timestamp": "2025-09-10 02:23:29.209502", + "step": 3549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:29.251602", + "step": 3549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012050783261656761, + "timestamp": "2025-09-10 02:23:29.265034", + "step": 3550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:29.301796", + "step": 3550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0464249923825264, + "timestamp": "2025-09-10 02:23:29.309590", + "step": 3551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 18509808050496 + }, + "timestamp": "2025-09-10 02:23:29.366721", + "step": 3551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006405732128769159, + "timestamp": "2025-09-10 02:23:29.409390", + "step": 3552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:29.450769", + "step": 3552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00038313533877953887, + "timestamp": "2025-09-10 02:23:29.458913", + "step": 3553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:29.495471", + "step": 3553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009717740467749536, + "timestamp": "2025-09-10 02:23:29.503209", + "step": 3554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:23:29.546360", + "step": 3554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007425061194226146, + "timestamp": "2025-09-10 02:23:29.561963", + "step": 3555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:29.602489", + "step": 3555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007218411774374545, + "timestamp": "2025-09-10 02:23:29.630453", + "step": 3556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:29.673053", + "step": 3556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036273004952818155, + "timestamp": "2025-09-10 02:23:29.677334", + "step": 3557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:29.718240", + "step": 3557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006142216734588146, + "timestamp": "2025-09-10 02:23:29.731639", + "step": 3558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:23:29.781095", + "step": 3558, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007438276661559939, + "timestamp": "2025-09-10 02:23:29.798206", + "step": 3559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:23:29.840536", + "step": 3559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00014973794168327004, + "timestamp": "2025-09-10 02:23:29.875435", + "step": 3560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:29.908439", + "step": 3560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003213935415260494, + "timestamp": "2025-09-10 02:23:29.913349", + "step": 3561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:29.947496", + "step": 3561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009177852771244943, + "timestamp": "2025-09-10 02:23:29.959499", + "step": 3562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:29.991532", + "step": 3562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032881794031709433, + "timestamp": "2025-09-10 02:23:30.002537", + "step": 3563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:30.033810", + "step": 3563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004884254653006792, + "timestamp": "2025-09-10 02:23:30.065793", + "step": 3564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:30.096997", + "step": 3564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005217746831476688, + "timestamp": "2025-09-10 02:23:30.106937", + "step": 3565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:30.137626", + "step": 3565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002377876313403249, + "timestamp": "2025-09-10 02:23:30.141679", + "step": 3566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:30.173799", + "step": 3566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0286801066249609, + "timestamp": "2025-09-10 02:23:30.181406", + "step": 3567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:30.218965", + "step": 3567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025519220158457756, + "timestamp": "2025-09-10 02:23:30.250158", + "step": 3568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:30.280235", + "step": 3568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003562222118489444, + "timestamp": "2025-09-10 02:23:30.284802", + "step": 3569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:30.316472", + "step": 3569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013461283408105373, + "timestamp": "2025-09-10 02:23:30.323863", + "step": 3570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:30.355122", + "step": 3570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00494401203468442, + "timestamp": "2025-09-10 02:23:30.362015", + "step": 3571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:30.394352", + "step": 3571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000439615425420925, + "timestamp": "2025-09-10 02:23:30.425502", + "step": 3572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:30.456922", + "step": 3572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008407292771153152, + "timestamp": "2025-09-10 02:23:30.464692", + "step": 3573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:30.495409", + "step": 3573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009364182478748262, + "timestamp": "2025-09-10 02:23:30.499546", + "step": 3574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:30.531237", + "step": 3574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026412170380353928, + "timestamp": "2025-09-10 02:23:30.538705", + "step": 3575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:30.569767", + "step": 3575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028750034980475903, + "timestamp": "2025-09-10 02:23:30.598405", + "step": 3576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:30.629371", + "step": 3576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007422365597449243, + "timestamp": "2025-09-10 02:23:30.634494", + "step": 3577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:30.664788", + "step": 3577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027479632990434766, + "timestamp": "2025-09-10 02:23:30.667506", + "step": 3578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:30.697388", + "step": 3578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005509444163180888, + "timestamp": "2025-09-10 02:23:30.701644", + "step": 3579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:30.732510", + "step": 3579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011080257827416062, + "timestamp": "2025-09-10 02:23:30.761032", + "step": 3580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:30.792233", + "step": 3580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001185481552965939, + "timestamp": "2025-09-10 02:23:30.802266", + "step": 3581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:30.832648", + "step": 3581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008745525847189128, + "timestamp": "2025-09-10 02:23:30.837004", + "step": 3582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:30.870080", + "step": 3582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021427616477012634, + "timestamp": "2025-09-10 02:23:30.881842", + "step": 3583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:30.913345", + "step": 3583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00033548500505276024, + "timestamp": "2025-09-10 02:23:30.938697", + "step": 3584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:30.976287", + "step": 3584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001324447919614613, + "timestamp": "2025-09-10 02:23:30.979084", + "step": 3585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:31.014022", + "step": 3585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027664339169859886, + "timestamp": "2025-09-10 02:23:31.022956", + "step": 3586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:31.054239", + "step": 3586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006238113855943084, + "timestamp": "2025-09-10 02:23:31.061602", + "step": 3587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:31.092272", + "step": 3587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007867095409892499, + "timestamp": "2025-09-10 02:23:31.120230", + "step": 3588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:31.151875", + "step": 3588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029622314497828484, + "timestamp": "2025-09-10 02:23:31.161735", + "step": 3589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:31.193252", + "step": 3589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00037610027357004583, + "timestamp": "2025-09-10 02:23:31.200375", + "step": 3590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:31.230682", + "step": 3590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002425598446279764, + "timestamp": "2025-09-10 02:23:31.234856", + "step": 3591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:31.266565", + "step": 3591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019675688818097115, + "timestamp": "2025-09-10 02:23:31.294566", + "step": 3592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:31.326699", + "step": 3592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001952952443389222, + "timestamp": "2025-09-10 02:23:31.331488", + "step": 3593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:31.362721", + "step": 3593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0218330230563879, + "timestamp": "2025-09-10 02:23:31.372828", + "step": 3594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:31.406850", + "step": 3594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0041700261645019054, + "timestamp": "2025-09-10 02:23:31.419032", + "step": 3595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:31.451196", + "step": 3595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004490002058446407, + "timestamp": "2025-09-10 02:23:31.483020", + "step": 3596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:31.514251", + "step": 3596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002926964545622468, + "timestamp": "2025-09-10 02:23:31.518753", + "step": 3597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:31.550875", + "step": 3597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023523984418716282, + "timestamp": "2025-09-10 02:23:31.558371", + "step": 3598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:31.590223", + "step": 3598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004813474602997303, + "timestamp": "2025-09-10 02:23:31.597312", + "step": 3599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:31.627946", + "step": 3599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00039707665564492345, + "timestamp": "2025-09-10 02:23:31.655957", + "step": 3600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:31.688467", + "step": 3600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000567035167478025, + "timestamp": "2025-09-10 02:23:31.693782", + "step": 3601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:31.724428", + "step": 3601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015649326145648956, + "timestamp": "2025-09-10 02:23:31.731574", + "step": 3602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:31.767124", + "step": 3602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01300052274018526, + "timestamp": "2025-09-10 02:23:31.773836", + "step": 3603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:31.805427", + "step": 3603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022090671584010124, + "timestamp": "2025-09-10 02:23:31.833160", + "step": 3604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:31.865752", + "step": 3604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002304330002516508, + "timestamp": "2025-09-10 02:23:31.870759", + "step": 3605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:31.901258", + "step": 3605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03670268505811691, + "timestamp": "2025-09-10 02:23:31.905369", + "step": 3606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:23:31.943717", + "step": 3606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005911525222472847, + "timestamp": "2025-09-10 02:23:31.959384", + "step": 3607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:31.992348", + "step": 3607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006495547713711858, + "timestamp": "2025-09-10 02:23:32.023492", + "step": 3608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:32.054908", + "step": 3608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005558125558309257, + "timestamp": "2025-09-10 02:23:32.059342", + "step": 3609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:32.090916", + "step": 3609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0061562503688037395, + "timestamp": "2025-09-10 02:23:32.102900", + "step": 3610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:32.134868", + "step": 3610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008789503946900368, + "timestamp": "2025-09-10 02:23:32.144774", + "step": 3611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:32.177089", + "step": 3611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000540457374881953, + "timestamp": "2025-09-10 02:23:32.204872", + "step": 3612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:32.236140", + "step": 3612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006599615444429219, + "timestamp": "2025-09-10 02:23:32.240526", + "step": 3613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:32.272703", + "step": 3613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002096888143569231, + "timestamp": "2025-09-10 02:23:32.280357", + "step": 3614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:32.311532", + "step": 3614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028935037553310394, + "timestamp": "2025-09-10 02:23:32.315934", + "step": 3615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:32.347081", + "step": 3615, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018138455925509334, + "timestamp": "2025-09-10 02:23:32.372354", + "step": 3616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:32.404503", + "step": 3616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006520233117043972, + "timestamp": "2025-09-10 02:23:32.409850", + "step": 3617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:32.445950", + "step": 3617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026026610285043716, + "timestamp": "2025-09-10 02:23:32.459332", + "step": 3618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:23:32.499491", + "step": 3618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011162899434566498, + "timestamp": "2025-09-10 02:23:32.515406", + "step": 3619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:32.549195", + "step": 3619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006067942013032734, + "timestamp": "2025-09-10 02:23:32.580873", + "step": 3620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:32.612421", + "step": 3620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009515011915937066, + "timestamp": "2025-09-10 02:23:32.617085", + "step": 3621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:32.650398", + "step": 3621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030977034475654364, + "timestamp": "2025-09-10 02:23:32.657133", + "step": 3622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:32.690597", + "step": 3622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023766574449837208, + "timestamp": "2025-09-10 02:23:32.701178", + "step": 3623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:32.732882", + "step": 3623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.050299059599637985, + "timestamp": "2025-09-10 02:23:32.766351", + "step": 3624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:32.798823", + "step": 3624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003462900349404663, + "timestamp": "2025-09-10 02:23:32.806489", + "step": 3625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:32.841734", + "step": 3625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03242403641343117, + "timestamp": "2025-09-10 02:23:32.854324", + "step": 3626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:32.886966", + "step": 3626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009655548492446542, + "timestamp": "2025-09-10 02:23:32.893816", + "step": 3627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:32.925570", + "step": 3627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011085288599133492, + "timestamp": "2025-09-10 02:23:32.957370", + "step": 3628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:23:33.001453", + "step": 3628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011424163822084665, + "timestamp": "2025-09-10 02:23:33.018706", + "step": 3629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:23:33.059054", + "step": 3629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027174891903996468, + "timestamp": "2025-09-10 02:23:33.074676", + "step": 3630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:33.112865", + "step": 3630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009898961521685123, + "timestamp": "2025-09-10 02:23:33.116705", + "step": 3631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:33.151599", + "step": 3631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001422611385351047, + "timestamp": "2025-09-10 02:23:33.178550", + "step": 3632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:33.216383", + "step": 3632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006423108279705048, + "timestamp": "2025-09-10 02:23:33.218613", + "step": 3633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:33.250747", + "step": 3633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006503048934973776, + "timestamp": "2025-09-10 02:23:33.254438", + "step": 3634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:33.287142", + "step": 3634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02117828093469143, + "timestamp": "2025-09-10 02:23:33.297447", + "step": 3635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:33.329371", + "step": 3635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025022621266543865, + "timestamp": "2025-09-10 02:23:33.356913", + "step": 3636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:33.388951", + "step": 3636, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004223259165883064, + "timestamp": "2025-09-10 02:23:33.391527", + "step": 3637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:23:33.426342", + "step": 3637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03953773155808449, + "timestamp": "2025-09-10 02:23:33.429345", + "step": 3638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:33.460347", + "step": 3638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025826627388596535, + "timestamp": "2025-09-10 02:23:33.470332", + "step": 3639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:23:33.505344", + "step": 3639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006002942100167274, + "timestamp": "2025-09-10 02:23:33.540029", + "step": 3640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:33.571550", + "step": 3640, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010516015812754631, + "timestamp": "2025-09-10 02:23:33.576171", + "step": 3641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:33.606991", + "step": 3641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020751620177179575, + "timestamp": "2025-09-10 02:23:33.613847", + "step": 3642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:33.645478", + "step": 3642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017879597144201398, + "timestamp": "2025-09-10 02:23:33.657529", + "step": 3643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:33.689679", + "step": 3643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004607823502738029, + "timestamp": "2025-09-10 02:23:33.722339", + "step": 3644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:33.753387", + "step": 3644, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012203511781990528, + "timestamp": "2025-09-10 02:23:33.763904", + "step": 3645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:33.797518", + "step": 3645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037501014303416014, + "timestamp": "2025-09-10 02:23:33.810878", + "step": 3646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:33.843012", + "step": 3646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006922434084117413, + "timestamp": "2025-09-10 02:23:33.849942", + "step": 3647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:33.880986", + "step": 3647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018579624593257904, + "timestamp": "2025-09-10 02:23:33.908723", + "step": 3648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:33.939821", + "step": 3648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014378555351868272, + "timestamp": "2025-09-10 02:23:33.943463", + "step": 3649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:33.975165", + "step": 3649, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004263308364897966, + "timestamp": "2025-09-10 02:23:33.985314", + "step": 3650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:34.015655", + "step": 3650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006144766230136156, + "timestamp": "2025-09-10 02:23:34.026553", + "step": 3651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:34.057068", + "step": 3651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022812257520854473, + "timestamp": "2025-09-10 02:23:34.080881", + "step": 3652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:34.111599", + "step": 3652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013019556179642677, + "timestamp": "2025-09-10 02:23:34.117102", + "step": 3653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:23:34.151180", + "step": 3653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008377458900213242, + "timestamp": "2025-09-10 02:23:34.164994", + "step": 3654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:34.195131", + "step": 3654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003772433614358306, + "timestamp": "2025-09-10 02:23:34.206283", + "step": 3655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:34.236552", + "step": 3655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017940301448106766, + "timestamp": "2025-09-10 02:23:34.262014", + "step": 3656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:34.294442", + "step": 3656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0058494978584349155, + "timestamp": "2025-09-10 02:23:34.298294", + "step": 3657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:34.330418", + "step": 3657, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02427353337407112, + "timestamp": "2025-09-10 02:23:34.342087", + "step": 3658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:23:34.383500", + "step": 3658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017965204315260053, + "timestamp": "2025-09-10 02:23:34.400621", + "step": 3659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:34.431989", + "step": 3659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022874141577631235, + "timestamp": "2025-09-10 02:23:34.459435", + "step": 3660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:34.490403", + "step": 3660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001652199076488614, + "timestamp": "2025-09-10 02:23:34.495126", + "step": 3661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:23:34.534987", + "step": 3661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002010711934417486, + "timestamp": "2025-09-10 02:23:34.550930", + "step": 3662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:23:34.582182", + "step": 3662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009694479405879974, + "timestamp": "2025-09-10 02:23:34.584510", + "step": 3663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:34.616529", + "step": 3663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007335420232266188, + "timestamp": "2025-09-10 02:23:34.647418", + "step": 3664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:34.679314", + "step": 3664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01509284321218729, + "timestamp": "2025-09-10 02:23:34.691955", + "step": 3665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:34.722912", + "step": 3665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03952633589506149, + "timestamp": "2025-09-10 02:23:34.730783", + "step": 3666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:34.760989", + "step": 3666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006663356442004442, + "timestamp": "2025-09-10 02:23:34.765594", + "step": 3667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:34.796418", + "step": 3667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018776053562760353, + "timestamp": "2025-09-10 02:23:34.824940", + "step": 3668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:34.855613", + "step": 3668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00801047496497631, + "timestamp": "2025-09-10 02:23:34.863532", + "step": 3669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:34.894177", + "step": 3669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03632910177111626, + "timestamp": "2025-09-10 02:23:34.905046", + "step": 3670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:34.936404", + "step": 3670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014353514648973942, + "timestamp": "2025-09-10 02:23:34.948727", + "step": 3671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:34.980357", + "step": 3671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00914282537996769, + "timestamp": "2025-09-10 02:23:35.007911", + "step": 3672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:35.039344", + "step": 3672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020550028420984745, + "timestamp": "2025-09-10 02:23:35.047649", + "step": 3673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:23:35.082542", + "step": 3673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013698196271434426, + "timestamp": "2025-09-10 02:23:35.096409", + "step": 3674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:23:35.135446", + "step": 3674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010654388461261988, + "timestamp": "2025-09-10 02:23:35.151603", + "step": 3675, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:23:45.394972", + "step": 3675, + "epoch": 2 + }, + { + "type": "pplx", + "content": 21047841.64222782, + "timestamp": "2025-09-10 02:23:45.398778", + "step": 3675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:45.430060", + "step": 3675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022460322361439466, + "timestamp": "2025-09-10 02:23:45.456947", + "step": 3676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:45.488804", + "step": 3676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00032661884324625134, + "timestamp": "2025-09-10 02:23:45.498535", + "step": 3677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:45.531376", + "step": 3677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034290028270334005, + "timestamp": "2025-09-10 02:23:45.538293", + "step": 3678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:45.569174", + "step": 3678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001095048151910305, + "timestamp": "2025-09-10 02:23:45.576755", + "step": 3679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:45.607998", + "step": 3679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005293484777212143, + "timestamp": "2025-09-10 02:23:45.641456", + "step": 3680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:45.673169", + "step": 3680, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005093970336019993, + "timestamp": "2025-09-10 02:23:45.685830", + "step": 3681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:45.716501", + "step": 3681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007117848144844174, + "timestamp": "2025-09-10 02:23:45.728651", + "step": 3682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:45.759255", + "step": 3682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00283992663025856, + "timestamp": "2025-09-10 02:23:45.770063", + "step": 3683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:45.803452", + "step": 3683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.029055939987301826, + "timestamp": "2025-09-10 02:23:45.837780", + "step": 3684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:45.868244", + "step": 3684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000742242147680372, + "timestamp": "2025-09-10 02:23:45.873656", + "step": 3685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:45.903744", + "step": 3685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007362429518252611, + "timestamp": "2025-09-10 02:23:45.910834", + "step": 3686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:45.941967", + "step": 3686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004590542521327734, + "timestamp": "2025-09-10 02:23:45.954565", + "step": 3687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:45.987952", + "step": 3687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010698004625737667, + "timestamp": "2025-09-10 02:23:46.022265", + "step": 3688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:46.053140", + "step": 3688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017821193905547261, + "timestamp": "2025-09-10 02:23:46.055294", + "step": 3689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:46.085878", + "step": 3689, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010044259950518608, + "timestamp": "2025-09-10 02:23:46.097980", + "step": 3690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:46.129333", + "step": 3690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010982693638652563, + "timestamp": "2025-09-10 02:23:46.140338", + "step": 3691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:23:46.175154", + "step": 3691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005969376303255558, + "timestamp": "2025-09-10 02:23:46.209750", + "step": 3692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:46.241338", + "step": 3692, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015076607232913375, + "timestamp": "2025-09-10 02:23:46.253943", + "step": 3693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:46.285232", + "step": 3693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018409850308671594, + "timestamp": "2025-09-10 02:23:46.289800", + "step": 3694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:46.319885", + "step": 3694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012192835565656424, + "timestamp": "2025-09-10 02:23:46.326508", + "step": 3695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:46.359303", + "step": 3695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002559660468250513, + "timestamp": "2025-09-10 02:23:46.386965", + "step": 3696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:46.426410", + "step": 3696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033039411064237356, + "timestamp": "2025-09-10 02:23:46.436026", + "step": 3697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:46.472303", + "step": 3697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010749106149887666, + "timestamp": "2025-09-10 02:23:46.482552", + "step": 3698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:46.513260", + "step": 3698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025171758607029915, + "timestamp": "2025-09-10 02:23:46.523468", + "step": 3699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:46.553613", + "step": 3699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031803150195628405, + "timestamp": "2025-09-10 02:23:46.581832", + "step": 3700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:46.612784", + "step": 3700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00467759370803833, + "timestamp": "2025-09-10 02:23:46.622624", + "step": 3701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:46.652511", + "step": 3701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028708036988973618, + "timestamp": "2025-09-10 02:23:46.657095", + "step": 3702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:46.688080", + "step": 3702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009561380720697343, + "timestamp": "2025-09-10 02:23:46.699122", + "step": 3703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:46.730072", + "step": 3703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001121461158618331, + "timestamp": "2025-09-10 02:23:46.757739", + "step": 3704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:46.788304", + "step": 3704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005151792895048857, + "timestamp": "2025-09-10 02:23:46.793886", + "step": 3705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:46.825904", + "step": 3705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00032375051523558795, + "timestamp": "2025-09-10 02:23:46.835936", + "step": 3706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:46.867444", + "step": 3706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005311047192662954, + "timestamp": "2025-09-10 02:23:46.874324", + "step": 3707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:46.904803", + "step": 3707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018711045384407043, + "timestamp": "2025-09-10 02:23:46.938269", + "step": 3708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:23:46.970344", + "step": 3708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00399330398067832, + "timestamp": "2025-09-10 02:23:46.983435", + "step": 3709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:47.015316", + "step": 3709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006306317518465221, + "timestamp": "2025-09-10 02:23:47.027878", + "step": 3710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:47.061711", + "step": 3710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018934406398329884, + "timestamp": "2025-09-10 02:23:47.071989", + "step": 3711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:47.104274", + "step": 3711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013638163916766644, + "timestamp": "2025-09-10 02:23:47.128327", + "step": 3712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:47.162414", + "step": 3712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031886231154203415, + "timestamp": "2025-09-10 02:23:47.166905", + "step": 3713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:23:47.204158", + "step": 3713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013043539365753531, + "timestamp": "2025-09-10 02:23:47.217908", + "step": 3714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:47.253639", + "step": 3714, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.598477218882181e-05, + "timestamp": "2025-09-10 02:23:47.257454", + "step": 3715, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:47.294393", + "step": 3715, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044489253195934, + "timestamp": "2025-09-10 02:23:47.327746", + "step": 3716, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:47.363529", + "step": 3716, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010741885052993894, + "timestamp": "2025-09-10 02:23:47.368139", + "step": 3717, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:47.418380", + "step": 3717, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003896048292517662, + "timestamp": "2025-09-10 02:23:47.428484", + "step": 3718, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:47.470970", + "step": 3718, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002632845425978303, + "timestamp": "2025-09-10 02:23:47.483553", + "step": 3719, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:23:47.529925", + "step": 3719, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023609664291143417, + "timestamp": "2025-09-10 02:23:47.564504", + "step": 3720, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:47.605360", + "step": 3720, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018469881266355515, + "timestamp": "2025-09-10 02:23:47.613216", + "step": 3721, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:23:47.657401", + "step": 3721, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002696176525205374, + "timestamp": "2025-09-10 02:23:47.671202", + "step": 3722, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:47.703080", + "step": 3722, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01182649191468954, + "timestamp": "2025-09-10 02:23:47.710965", + "step": 3723, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:47.741115", + "step": 3723, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016434434801340103, + "timestamp": "2025-09-10 02:23:47.769176", + "step": 3724, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:47.801840", + "step": 3724, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009728246368467808, + "timestamp": "2025-09-10 02:23:47.809393", + "step": 3725, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:47.842116", + "step": 3725, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011867971625179052, + "timestamp": "2025-09-10 02:23:47.849032", + "step": 3726, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:47.885747", + "step": 3726, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010371259413659573, + "timestamp": "2025-09-10 02:23:47.899151", + "step": 3727, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:47.932148", + "step": 3727, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00210155313834548, + "timestamp": "2025-09-10 02:23:47.962867", + "step": 3728, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:23:47.995654", + "step": 3728, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019191886531189084, + "timestamp": "2025-09-10 02:23:48.008286", + "step": 3729, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:48.039506", + "step": 3729, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00013749170466326177, + "timestamp": "2025-09-10 02:23:48.044059", + "step": 3730, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:48.077114", + "step": 3730, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002893412485718727, + "timestamp": "2025-09-10 02:23:48.084814", + "step": 3731, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:48.117583", + "step": 3731, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007469491683878005, + "timestamp": "2025-09-10 02:23:48.146222", + "step": 3732, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:48.178024", + "step": 3732, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003932146355509758, + "timestamp": "2025-09-10 02:23:48.186459", + "step": 3733, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:48.218119", + "step": 3733, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.557453838875517e-05, + "timestamp": "2025-09-10 02:23:48.221036", + "step": 3734, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:48.253673", + "step": 3734, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002127924090018496, + "timestamp": "2025-09-10 02:23:48.263724", + "step": 3735, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:48.295365", + "step": 3735, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040841306326910853, + "timestamp": "2025-09-10 02:23:48.320578", + "step": 3736, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 18509808050496 + }, + "timestamp": "2025-09-10 02:23:48.369927", + "step": 3736, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010468022665008903, + "timestamp": "2025-09-10 02:23:48.391699", + "step": 3737, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:48.421601", + "step": 3737, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001885231613414362, + "timestamp": "2025-09-10 02:23:48.428506", + "step": 3738, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:48.458297", + "step": 3738, + "epoch": 2 + }, + { + "type": "loss", + "content": 3.569291584426537e-05, + "timestamp": "2025-09-10 02:23:48.462466", + "step": 3739, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:23:48.505255", + "step": 3739, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003039777046069503, + "timestamp": "2025-09-10 02:23:48.543505", + "step": 3740, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:48.574502", + "step": 3740, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006935932207852602, + "timestamp": "2025-09-10 02:23:48.584997", + "step": 3741, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:48.619172", + "step": 3741, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013960804790258408, + "timestamp": "2025-09-10 02:23:48.626071", + "step": 3742, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:48.660000", + "step": 3742, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006007336778566241, + "timestamp": "2025-09-10 02:23:48.664017", + "step": 3743, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:48.695629", + "step": 3743, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013558273203670979, + "timestamp": "2025-09-10 02:23:48.727061", + "step": 3744, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:48.758708", + "step": 3744, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007245481247082353, + "timestamp": "2025-09-10 02:23:48.761187", + "step": 3745, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:48.791751", + "step": 3745, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05813758820295334, + "timestamp": "2025-09-10 02:23:48.798627", + "step": 3746, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:48.839439", + "step": 3746, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002415095950709656, + "timestamp": "2025-09-10 02:23:48.845944", + "step": 3747, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:48.877586", + "step": 3747, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000962139165494591, + "timestamp": "2025-09-10 02:23:48.905331", + "step": 3748, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:48.936723", + "step": 3748, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019527226686477661, + "timestamp": "2025-09-10 02:23:48.938587", + "step": 3749, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:48.968844", + "step": 3749, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019534730818122625, + "timestamp": "2025-09-10 02:23:48.975883", + "step": 3750, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:49.007630", + "step": 3750, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006131255067884922, + "timestamp": "2025-09-10 02:23:49.020206", + "step": 3751, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:23:49.058993", + "step": 3751, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003716005012392998, + "timestamp": "2025-09-10 02:23:49.095806", + "step": 3752, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:49.126990", + "step": 3752, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004358034930191934, + "timestamp": "2025-09-10 02:23:49.134452", + "step": 3753, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:49.165481", + "step": 3753, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044732578680850565, + "timestamp": "2025-09-10 02:23:49.169363", + "step": 3754, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:49.201372", + "step": 3754, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004059514496475458, + "timestamp": "2025-09-10 02:23:49.208648", + "step": 3755, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:49.240352", + "step": 3755, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000174950881046243, + "timestamp": "2025-09-10 02:23:49.265504", + "step": 3756, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:23:49.304855", + "step": 3756, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003618494840338826, + "timestamp": "2025-09-10 02:23:49.307253", + "step": 3757, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:49.338656", + "step": 3757, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000870470714289695, + "timestamp": "2025-09-10 02:23:49.345475", + "step": 3758, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:49.380400", + "step": 3758, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002803669194690883, + "timestamp": "2025-09-10 02:23:49.388113", + "step": 3759, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:49.419299", + "step": 3759, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02521214261651039, + "timestamp": "2025-09-10 02:23:49.449879", + "step": 3760, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:49.480870", + "step": 3760, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03576240316033363, + "timestamp": "2025-09-10 02:23:49.483063", + "step": 3761, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:23:49.534629", + "step": 3761, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.043033067137002945, + "timestamp": "2025-09-10 02:23:49.556117", + "step": 3762, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:49.587429", + "step": 3762, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002879269886761904, + "timestamp": "2025-09-10 02:23:49.591819", + "step": 3763, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:49.622714", + "step": 3763, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011223888956010342, + "timestamp": "2025-09-10 02:23:49.647657", + "step": 3764, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:49.678505", + "step": 3764, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006490662926808, + "timestamp": "2025-09-10 02:23:49.680403", + "step": 3765, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:49.711153", + "step": 3765, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002436300739645958, + "timestamp": "2025-09-10 02:23:49.718328", + "step": 3766, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:49.749636", + "step": 3766, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03835447505116463, + "timestamp": "2025-09-10 02:23:49.753874", + "step": 3767, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:49.785074", + "step": 3767, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001985372742637992, + "timestamp": "2025-09-10 02:23:49.813463", + "step": 3768, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:49.845131", + "step": 3768, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005818284582346678, + "timestamp": "2025-09-10 02:23:49.847905", + "step": 3769, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:49.878162", + "step": 3769, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013168009463697672, + "timestamp": "2025-09-10 02:23:49.885062", + "step": 3770, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:49.918288", + "step": 3770, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015119427116587758, + "timestamp": "2025-09-10 02:23:49.925676", + "step": 3771, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:49.959836", + "step": 3771, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005293331341817975, + "timestamp": "2025-09-10 02:23:49.987984", + "step": 3772, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:50.019957", + "step": 3772, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002255245781270787, + "timestamp": "2025-09-10 02:23:50.029337", + "step": 3773, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:50.060821", + "step": 3773, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010724698659032583, + "timestamp": "2025-09-10 02:23:50.068255", + "step": 3774, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:50.099297", + "step": 3774, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030956987757235765, + "timestamp": "2025-09-10 02:23:50.105997", + "step": 3775, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:50.138577", + "step": 3775, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003467730712145567, + "timestamp": "2025-09-10 02:23:50.170347", + "step": 3776, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:50.201636", + "step": 3776, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005490719340741634, + "timestamp": "2025-09-10 02:23:50.206811", + "step": 3777, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:50.239883", + "step": 3777, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002172060776501894, + "timestamp": "2025-09-10 02:23:50.247433", + "step": 3778, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:50.278787", + "step": 3778, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002384532243013382, + "timestamp": "2025-09-10 02:23:50.286019", + "step": 3779, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:50.320002", + "step": 3779, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02834930457174778, + "timestamp": "2025-09-10 02:23:50.347846", + "step": 3780, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:50.378714", + "step": 3780, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006114445626735687, + "timestamp": "2025-09-10 02:23:50.381081", + "step": 3781, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:50.411840", + "step": 3781, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004408101085573435, + "timestamp": "2025-09-10 02:23:50.419203", + "step": 3782, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:50.449842", + "step": 3782, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024946555495262146, + "timestamp": "2025-09-10 02:23:50.457195", + "step": 3783, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:23:50.489297", + "step": 3783, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01270032487809658, + "timestamp": "2025-09-10 02:23:50.521844", + "step": 3784, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:50.553321", + "step": 3784, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033720643259584904, + "timestamp": "2025-09-10 02:23:50.558695", + "step": 3785, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:50.589742", + "step": 3785, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05379801243543625, + "timestamp": "2025-09-10 02:23:50.596659", + "step": 3786, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:50.627235", + "step": 3786, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017733937129378319, + "timestamp": "2025-09-10 02:23:50.634247", + "step": 3787, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:23:50.689983", + "step": 3787, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037569236010313034, + "timestamp": "2025-09-10 02:23:50.734334", + "step": 3788, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:50.765113", + "step": 3788, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019782905001193285, + "timestamp": "2025-09-10 02:23:50.769424", + "step": 3789, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:50.800084", + "step": 3789, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013935952447354794, + "timestamp": "2025-09-10 02:23:50.807050", + "step": 3790, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:50.847666", + "step": 3790, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01583055406808853, + "timestamp": "2025-09-10 02:23:50.852158", + "step": 3791, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:50.882829", + "step": 3791, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005747873219661415, + "timestamp": "2025-09-10 02:23:50.914311", + "step": 3792, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:50.947726", + "step": 3792, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003728387819137424, + "timestamp": "2025-09-10 02:23:50.955043", + "step": 3793, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:50.985737", + "step": 3793, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017032746691256762, + "timestamp": "2025-09-10 02:23:50.989812", + "step": 3794, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:23:51.021167", + "step": 3794, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027263087686151266, + "timestamp": "2025-09-10 02:23:51.023598", + "step": 3795, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:51.053872", + "step": 3795, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019095286261290312, + "timestamp": "2025-09-10 02:23:51.081640", + "step": 3796, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:51.113023", + "step": 3796, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0055357408709824085, + "timestamp": "2025-09-10 02:23:51.120842", + "step": 3797, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:51.152210", + "step": 3797, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00228920322842896, + "timestamp": "2025-09-10 02:23:51.162997", + "step": 3798, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:51.193944", + "step": 3798, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029537074733525515, + "timestamp": "2025-09-10 02:23:51.200712", + "step": 3799, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:51.231792", + "step": 3799, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009542775340378284, + "timestamp": "2025-09-10 02:23:51.259863", + "step": 3800, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:51.292129", + "step": 3800, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010026551317423582, + "timestamp": "2025-09-10 02:23:51.297291", + "step": 3801, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:51.328429", + "step": 3801, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001947238779393956, + "timestamp": "2025-09-10 02:23:51.335239", + "step": 3802, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:51.366633", + "step": 3802, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014312856364995241, + "timestamp": "2025-09-10 02:23:51.373403", + "step": 3803, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:51.404802", + "step": 3803, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010877539170905948, + "timestamp": "2025-09-10 02:23:51.435519", + "step": 3804, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:23:51.472136", + "step": 3804, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0061668092384934425, + "timestamp": "2025-09-10 02:23:51.487525", + "step": 3805, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:51.519845", + "step": 3805, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013660427648574114, + "timestamp": "2025-09-10 02:23:51.531967", + "step": 3806, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:51.564750", + "step": 3806, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00044723015162162483, + "timestamp": "2025-09-10 02:23:51.574188", + "step": 3807, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:23:51.606066", + "step": 3807, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004913516459055245, + "timestamp": "2025-09-10 02:23:51.633627", + "step": 3808, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:23:51.665052", + "step": 3808, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005763310939073563, + "timestamp": "2025-09-10 02:23:51.672360", + "step": 3809, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:51.704493", + "step": 3809, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034396513365209103, + "timestamp": "2025-09-10 02:23:51.711547", + "step": 3810, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:23:51.742543", + "step": 3810, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012839804403483868, + "timestamp": "2025-09-10 02:23:51.754889", + "step": 3811, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:51.786891", + "step": 3811, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012081711320206523, + "timestamp": "2025-09-10 02:23:51.818243", + "step": 3812, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:51.849541", + "step": 3812, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012805964797735214, + "timestamp": "2025-09-10 02:23:51.854395", + "step": 3813, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:51.892405", + "step": 3813, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016190716996788979, + "timestamp": "2025-09-10 02:23:51.902950", + "step": 3814, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:51.934567", + "step": 3814, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00160274060908705, + "timestamp": "2025-09-10 02:23:51.941937", + "step": 3815, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:23:51.973443", + "step": 3815, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007644836790859699, + "timestamp": "2025-09-10 02:23:52.000824", + "step": 3816, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:23:52.034481", + "step": 3816, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.030178042128682137, + "timestamp": "2025-09-10 02:23:52.037320", + "step": 3817, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:23:52.069303", + "step": 3817, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00040458128205500543, + "timestamp": "2025-09-10 02:23:52.076280", + "step": 3818, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:23:52.107864", + "step": 3818, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021405117586255074, + "timestamp": "2025-09-10 02:23:52.115515", + "step": 3819, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:23:52.146252", + "step": 3819, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013643233105540276, + "timestamp": "2025-09-10 02:23:52.177599", + "step": 3820, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:52.209358", + "step": 3820, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004835444502532482, + "timestamp": "2025-09-10 02:23:52.211571", + "step": 3821, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:23:52.242137", + "step": 3821, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022363480180501938, + "timestamp": "2025-09-10 02:23:52.246433", + "step": 3822, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:24:02.339761", + "step": 3822, + "epoch": 2 + }, + { + "type": "pplx", + "content": 19433033.667341556, + "timestamp": "2025-09-10 02:24:02.342208", + "step": 3822, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:24:02.381828", + "step": 3822, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.043930236250162125, + "timestamp": "2025-09-10 02:24:02.399123", + "step": 3823, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:24:02.438505", + "step": 3823, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004557534120976925, + "timestamp": "2025-09-10 02:24:02.474955", + "step": 3824, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:02.509196", + "step": 3824, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007657821988686919, + "timestamp": "2025-09-10 02:24:02.513420", + "step": 3825, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:02.546830", + "step": 3825, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007892725057899952, + "timestamp": "2025-09-10 02:24:02.558785", + "step": 3826, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:24:02.599847", + "step": 3826, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026977970264852047, + "timestamp": "2025-09-10 02:24:02.616867", + "step": 3827, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:02.647816", + "step": 3827, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012466126354411244, + "timestamp": "2025-09-10 02:24:02.675701", + "step": 3828, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:24:02.712249", + "step": 3828, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026414524763822556, + "timestamp": "2025-09-10 02:24:02.728133", + "step": 3829, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:02.761149", + "step": 3829, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000412534165661782, + "timestamp": "2025-09-10 02:24:02.766841", + "step": 3830, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:02.801406", + "step": 3830, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004566606599837542, + "timestamp": "2025-09-10 02:24:02.810504", + "step": 3831, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:02.842542", + "step": 3831, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003999842330813408, + "timestamp": "2025-09-10 02:24:02.874692", + "step": 3832, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:02.907490", + "step": 3832, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.028634166345000267, + "timestamp": "2025-09-10 02:24:02.915127", + "step": 3833, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:02.947307", + "step": 3833, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010636513121426105, + "timestamp": "2025-09-10 02:24:02.950689", + "step": 3834, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:02.982711", + "step": 3834, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00048292818246409297, + "timestamp": "2025-09-10 02:24:02.993519", + "step": 3835, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:03.025164", + "step": 3835, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005672777188010514, + "timestamp": "2025-09-10 02:24:03.052537", + "step": 3836, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:03.084203", + "step": 3836, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003444700560066849, + "timestamp": "2025-09-10 02:24:03.093537", + "step": 3837, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:03.125051", + "step": 3837, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015334226191043854, + "timestamp": "2025-09-10 02:24:03.135462", + "step": 3838, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:03.166961", + "step": 3838, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006245824624784291, + "timestamp": "2025-09-10 02:24:03.177542", + "step": 3839, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:03.208216", + "step": 3839, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002433412882965058, + "timestamp": "2025-09-10 02:24:03.241567", + "step": 3840, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:03.273090", + "step": 3840, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006837646360509098, + "timestamp": "2025-09-10 02:24:03.277420", + "step": 3841, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:03.308079", + "step": 3841, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020594163797795773, + "timestamp": "2025-09-10 02:24:03.312065", + "step": 3842, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:03.342985", + "step": 3842, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023391323629766703, + "timestamp": "2025-09-10 02:24:03.347097", + "step": 3843, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:03.377572", + "step": 3843, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00048196568968705833, + "timestamp": "2025-09-10 02:24:03.402366", + "step": 3844, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:03.433467", + "step": 3844, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002800496993586421, + "timestamp": "2025-09-10 02:24:03.435938", + "step": 3845, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:03.466018", + "step": 3845, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011212360113859177, + "timestamp": "2025-09-10 02:24:03.469910", + "step": 3846, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:03.503751", + "step": 3846, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017739442409947515, + "timestamp": "2025-09-10 02:24:03.517821", + "step": 3847, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:24:03.548066", + "step": 3847, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002876395359635353, + "timestamp": "2025-09-10 02:24:03.571371", + "step": 3848, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:03.601521", + "step": 3848, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032014145981520414, + "timestamp": "2025-09-10 02:24:03.605727", + "step": 3849, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:03.636647", + "step": 3849, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001701483502984047, + "timestamp": "2025-09-10 02:24:03.646311", + "step": 3850, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:03.677513", + "step": 3850, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01332316268235445, + "timestamp": "2025-09-10 02:24:03.684806", + "step": 3851, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:03.715898", + "step": 3851, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010636200197041035, + "timestamp": "2025-09-10 02:24:03.740412", + "step": 3852, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:03.772090", + "step": 3852, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018638168694451451, + "timestamp": "2025-09-10 02:24:03.778728", + "step": 3853, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:03.810931", + "step": 3853, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011919804383069277, + "timestamp": "2025-09-10 02:24:03.820628", + "step": 3854, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:03.854201", + "step": 3854, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002363163512200117, + "timestamp": "2025-09-10 02:24:03.860738", + "step": 3855, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:03.892534", + "step": 3855, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008571963990107179, + "timestamp": "2025-09-10 02:24:03.916324", + "step": 3856, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:03.947688", + "step": 3856, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001244921120814979, + "timestamp": "2025-09-10 02:24:03.949591", + "step": 3857, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:03.979801", + "step": 3857, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029732866678386927, + "timestamp": "2025-09-10 02:24:03.986458", + "step": 3858, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:04.018877", + "step": 3858, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002771953120827675, + "timestamp": "2025-09-10 02:24:04.028498", + "step": 3859, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:04.060667", + "step": 3859, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005387973506003618, + "timestamp": "2025-09-10 02:24:04.088671", + "step": 3860, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:24:04.127687", + "step": 3860, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001176676363684237, + "timestamp": "2025-09-10 02:24:04.144668", + "step": 3861, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:04.177813", + "step": 3861, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007978384965099394, + "timestamp": "2025-09-10 02:24:04.184388", + "step": 3862, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:04.219122", + "step": 3862, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02228599414229393, + "timestamp": "2025-09-10 02:24:04.226335", + "step": 3863, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:04.258575", + "step": 3863, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003167739836499095, + "timestamp": "2025-09-10 02:24:04.283761", + "step": 3864, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:24:04.321725", + "step": 3864, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012020551366731524, + "timestamp": "2025-09-10 02:24:04.337366", + "step": 3865, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:04.372743", + "step": 3865, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009655249887146056, + "timestamp": "2025-09-10 02:24:04.383198", + "step": 3866, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:04.423189", + "step": 3866, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005044231074862182, + "timestamp": "2025-09-10 02:24:04.430487", + "step": 3867, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:04.468856", + "step": 3867, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006265141419135034, + "timestamp": "2025-09-10 02:24:04.496358", + "step": 3868, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:04.535109", + "step": 3868, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006974139832891524, + "timestamp": "2025-09-10 02:24:04.539510", + "step": 3869, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:04.576441", + "step": 3869, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025091536808758974, + "timestamp": "2025-09-10 02:24:04.583740", + "step": 3870, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:04.616020", + "step": 3870, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002585696056485176, + "timestamp": "2025-09-10 02:24:04.623367", + "step": 3871, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:04.653411", + "step": 3871, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017903503612615168, + "timestamp": "2025-09-10 02:24:04.676942", + "step": 3872, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:04.709069", + "step": 3872, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017580740386620164, + "timestamp": "2025-09-10 02:24:04.718216", + "step": 3873, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:04.749475", + "step": 3873, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017524746945127845, + "timestamp": "2025-09-10 02:24:04.755997", + "step": 3874, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:04.787618", + "step": 3874, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003729330201167613, + "timestamp": "2025-09-10 02:24:04.797160", + "step": 3875, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:04.828740", + "step": 3875, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002224268391728401, + "timestamp": "2025-09-10 02:24:04.861938", + "step": 3876, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:04.893491", + "step": 3876, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009001967846415937, + "timestamp": "2025-09-10 02:24:04.897745", + "step": 3877, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:04.930422", + "step": 3877, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016282566357403994, + "timestamp": "2025-09-10 02:24:04.937753", + "step": 3878, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:04.969288", + "step": 3878, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.040085725486278534, + "timestamp": "2025-09-10 02:24:04.972968", + "step": 3879, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:05.004246", + "step": 3879, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017969904001802206, + "timestamp": "2025-09-10 02:24:05.032418", + "step": 3880, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:05.064709", + "step": 3880, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019167568534612656, + "timestamp": "2025-09-10 02:24:05.069317", + "step": 3881, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:05.100668", + "step": 3881, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000999232055619359, + "timestamp": "2025-09-10 02:24:05.108173", + "step": 3882, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:24:05.140099", + "step": 3882, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007008261163718998, + "timestamp": "2025-09-10 02:24:05.142449", + "step": 3883, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:24:05.180595", + "step": 3883, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016444515204057097, + "timestamp": "2025-09-10 02:24:05.217354", + "step": 3884, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:05.251315", + "step": 3884, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006205525132827461, + "timestamp": "2025-09-10 02:24:05.259496", + "step": 3885, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:05.290788", + "step": 3885, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010018015746027231, + "timestamp": "2025-09-10 02:24:05.297304", + "step": 3886, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:05.329551", + "step": 3886, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008122866274788976, + "timestamp": "2025-09-10 02:24:05.336204", + "step": 3887, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:05.367674", + "step": 3887, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004226117394864559, + "timestamp": "2025-09-10 02:24:05.398780", + "step": 3888, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:05.431026", + "step": 3888, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002288134302943945, + "timestamp": "2025-09-10 02:24:05.435599", + "step": 3889, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:24:05.477036", + "step": 3889, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027158772572875023, + "timestamp": "2025-09-10 02:24:05.494244", + "step": 3890, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:05.525912", + "step": 3890, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001486063003540039, + "timestamp": "2025-09-10 02:24:05.535115", + "step": 3891, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:05.566775", + "step": 3891, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014154304517433047, + "timestamp": "2025-09-10 02:24:05.591814", + "step": 3892, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:05.623348", + "step": 3892, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015531855635344982, + "timestamp": "2025-09-10 02:24:05.627610", + "step": 3893, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:05.658895", + "step": 3893, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005819514626637101, + "timestamp": "2025-09-10 02:24:05.661365", + "step": 3894, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:05.692262", + "step": 3894, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014906743308529258, + "timestamp": "2025-09-10 02:24:05.696070", + "step": 3895, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:05.726485", + "step": 3895, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021287697018124163, + "timestamp": "2025-09-10 02:24:05.757225", + "step": 3896, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:05.787897", + "step": 3896, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001206160755828023, + "timestamp": "2025-09-10 02:24:05.793073", + "step": 3897, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:05.824256", + "step": 3897, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016391824465245008, + "timestamp": "2025-09-10 02:24:05.826387", + "step": 3898, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:05.857451", + "step": 3898, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0073780762031674385, + "timestamp": "2025-09-10 02:24:05.868009", + "step": 3899, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:05.902995", + "step": 3899, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006778707611374557, + "timestamp": "2025-09-10 02:24:05.930468", + "step": 3900, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:05.961954", + "step": 3900, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003114322025794536, + "timestamp": "2025-09-10 02:24:05.969241", + "step": 3901, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:06.002522", + "step": 3901, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023112166672945023, + "timestamp": "2025-09-10 02:24:06.016152", + "step": 3902, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:06.047931", + "step": 3902, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018408901523798704, + "timestamp": "2025-09-10 02:24:06.054449", + "step": 3903, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:06.085284", + "step": 3903, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006988499662838876, + "timestamp": "2025-09-10 02:24:06.110276", + "step": 3904, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:06.142214", + "step": 3904, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008732205023989081, + "timestamp": "2025-09-10 02:24:06.149419", + "step": 3905, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:06.180067", + "step": 3905, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010088557610288262, + "timestamp": "2025-09-10 02:24:06.187424", + "step": 3906, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:06.219238", + "step": 3906, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009297534124925733, + "timestamp": "2025-09-10 02:24:06.223486", + "step": 3907, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:06.254588", + "step": 3907, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002321895444765687, + "timestamp": "2025-09-10 02:24:06.286023", + "step": 3908, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:06.317271", + "step": 3908, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007969407364726067, + "timestamp": "2025-09-10 02:24:06.319873", + "step": 3909, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:06.350925", + "step": 3909, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036575000267475843, + "timestamp": "2025-09-10 02:24:06.357603", + "step": 3910, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:06.389054", + "step": 3910, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004755291156470776, + "timestamp": "2025-09-10 02:24:06.395946", + "step": 3911, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:06.427525", + "step": 3911, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007069699349813163, + "timestamp": "2025-09-10 02:24:06.452159", + "step": 3912, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:06.483179", + "step": 3912, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02136976644396782, + "timestamp": "2025-09-10 02:24:06.493098", + "step": 3913, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:06.523811", + "step": 3913, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01693398505449295, + "timestamp": "2025-09-10 02:24:06.531364", + "step": 3914, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:06.563605", + "step": 3914, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002733456203714013, + "timestamp": "2025-09-10 02:24:06.570557", + "step": 3915, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:06.601457", + "step": 3915, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010088739916682243, + "timestamp": "2025-09-10 02:24:06.629030", + "step": 3916, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:06.659845", + "step": 3916, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005804885877296329, + "timestamp": "2025-09-10 02:24:06.664255", + "step": 3917, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:06.695717", + "step": 3917, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004878589534200728, + "timestamp": "2025-09-10 02:24:06.707563", + "step": 3918, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:06.738941", + "step": 3918, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038721126038581133, + "timestamp": "2025-09-10 02:24:06.751502", + "step": 3919, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:06.784020", + "step": 3919, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008839900838211179, + "timestamp": "2025-09-10 02:24:06.811653", + "step": 3920, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:06.843427", + "step": 3920, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010170344030484557, + "timestamp": "2025-09-10 02:24:06.845414", + "step": 3921, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:06.876709", + "step": 3921, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000293319666525349, + "timestamp": "2025-09-10 02:24:06.880635", + "step": 3922, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:06.911713", + "step": 3922, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001344728167168796, + "timestamp": "2025-09-10 02:24:06.918603", + "step": 3923, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:06.952454", + "step": 3923, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00218668463639915, + "timestamp": "2025-09-10 02:24:06.983868", + "step": 3924, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:07.014758", + "step": 3924, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008733495138585567, + "timestamp": "2025-09-10 02:24:07.019956", + "step": 3925, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:07.053619", + "step": 3925, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000639695324935019, + "timestamp": "2025-09-10 02:24:07.067069", + "step": 3926, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:07.098065", + "step": 3926, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011399161303415895, + "timestamp": "2025-09-10 02:24:07.100522", + "step": 3927, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:07.131720", + "step": 3927, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007331220549531281, + "timestamp": "2025-09-10 02:24:07.160224", + "step": 3928, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:07.192773", + "step": 3928, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00026727074873633683, + "timestamp": "2025-09-10 02:24:07.205833", + "step": 3929, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:07.237355", + "step": 3929, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014935116050764918, + "timestamp": "2025-09-10 02:24:07.249171", + "step": 3930, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:07.279958", + "step": 3930, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003637108893599361, + "timestamp": "2025-09-10 02:24:07.284267", + "step": 3931, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:07.315434", + "step": 3931, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00031519282492809, + "timestamp": "2025-09-10 02:24:07.340156", + "step": 3932, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:07.372778", + "step": 3932, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001294466550461948, + "timestamp": "2025-09-10 02:24:07.376862", + "step": 3933, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:07.408613", + "step": 3933, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013076617615297437, + "timestamp": "2025-09-10 02:24:07.412877", + "step": 3934, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:07.443775", + "step": 3934, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009365587611682713, + "timestamp": "2025-09-10 02:24:07.450806", + "step": 3935, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:24:07.481976", + "step": 3935, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000639063015114516, + "timestamp": "2025-09-10 02:24:07.506020", + "step": 3936, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:07.536530", + "step": 3936, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008496752008795738, + "timestamp": "2025-09-10 02:24:07.541316", + "step": 3937, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:07.572193", + "step": 3937, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006357203237712383, + "timestamp": "2025-09-10 02:24:07.579907", + "step": 3938, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:07.610407", + "step": 3938, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021839377586729825, + "timestamp": "2025-09-10 02:24:07.617289", + "step": 3939, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:07.648094", + "step": 3939, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003433347155805677, + "timestamp": "2025-09-10 02:24:07.681085", + "step": 3940, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:07.712244", + "step": 3940, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004599998064804822, + "timestamp": "2025-09-10 02:24:07.716927", + "step": 3941, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:07.747344", + "step": 3941, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008438892662525177, + "timestamp": "2025-09-10 02:24:07.758360", + "step": 3942, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:07.789709", + "step": 3942, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013779483269900084, + "timestamp": "2025-09-10 02:24:07.802315", + "step": 3943, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:07.832842", + "step": 3943, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002914096985477954, + "timestamp": "2025-09-10 02:24:07.860587", + "step": 3944, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:07.894923", + "step": 3944, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012282740790396929, + "timestamp": "2025-09-10 02:24:07.902646", + "step": 3945, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:07.932838", + "step": 3945, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001276891416637227, + "timestamp": "2025-09-10 02:24:07.940473", + "step": 3946, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:07.970465", + "step": 3946, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006276755593717098, + "timestamp": "2025-09-10 02:24:07.981506", + "step": 3947, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:08.012106", + "step": 3947, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029240294825285673, + "timestamp": "2025-09-10 02:24:08.043281", + "step": 3948, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:08.073563", + "step": 3948, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004812986881006509, + "timestamp": "2025-09-10 02:24:08.079004", + "step": 3949, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:24:08.108621", + "step": 3949, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00047114197514019907, + "timestamp": "2025-09-10 02:24:08.111228", + "step": 3950, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:08.141972", + "step": 3950, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006882947636768222, + "timestamp": "2025-09-10 02:24:08.154144", + "step": 3951, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:08.184207", + "step": 3951, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003981906455010176, + "timestamp": "2025-09-10 02:24:08.212332", + "step": 3952, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:08.243261", + "step": 3952, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001539530057925731, + "timestamp": "2025-09-10 02:24:08.245687", + "step": 3953, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:08.276533", + "step": 3953, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001202300889417529, + "timestamp": "2025-09-10 02:24:08.286904", + "step": 3954, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:08.317017", + "step": 3954, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010202035773545504, + "timestamp": "2025-09-10 02:24:08.327013", + "step": 3955, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:08.357903", + "step": 3955, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005083515774458647, + "timestamp": "2025-09-10 02:24:08.386110", + "step": 3956, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:08.416923", + "step": 3956, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00014192526577971876, + "timestamp": "2025-09-10 02:24:08.429592", + "step": 3957, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:08.458783", + "step": 3957, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0236373208463192, + "timestamp": "2025-09-10 02:24:08.463031", + "step": 3958, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:08.493098", + "step": 3958, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004410702269524336, + "timestamp": "2025-09-10 02:24:08.505336", + "step": 3959, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:08.536639", + "step": 3959, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011517350794747472, + "timestamp": "2025-09-10 02:24:08.565282", + "step": 3960, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:08.596232", + "step": 3960, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006834971136413515, + "timestamp": "2025-09-10 02:24:08.606704", + "step": 3961, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:08.637610", + "step": 3961, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012764804705511779, + "timestamp": "2025-09-10 02:24:08.644426", + "step": 3962, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:08.678508", + "step": 3962, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006874548853375018, + "timestamp": "2025-09-10 02:24:08.685403", + "step": 3963, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:08.716274", + "step": 3963, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035995282232761383, + "timestamp": "2025-09-10 02:24:08.744389", + "step": 3964, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:08.777692", + "step": 3964, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014470146968960762, + "timestamp": "2025-09-10 02:24:08.782058", + "step": 3965, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:08.813486", + "step": 3965, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.019030440598726273, + "timestamp": "2025-09-10 02:24:08.817956", + "step": 3966, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:08.848437", + "step": 3966, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006014609825797379, + "timestamp": "2025-09-10 02:24:08.855440", + "step": 3967, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:08.887013", + "step": 3967, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009079644805751741, + "timestamp": "2025-09-10 02:24:08.915256", + "step": 3968, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:08.946655", + "step": 3968, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007088962593115866, + "timestamp": "2025-09-10 02:24:08.951854", + "step": 3969, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:24:19.403820", + "step": 3969, + "epoch": 2 + }, + { + "type": "pplx", + "content": 22799844.439538065, + "timestamp": "2025-09-10 02:24:19.408402", + "step": 3969, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:19.442139", + "step": 3969, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003047510690521449, + "timestamp": "2025-09-10 02:24:19.455828", + "step": 3970, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:24:19.495658", + "step": 3970, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005057158879935741, + "timestamp": "2025-09-10 02:24:19.511583", + "step": 3971, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:19.544202", + "step": 3971, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008785208687186241, + "timestamp": "2025-09-10 02:24:19.572213", + "step": 3972, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:19.610162", + "step": 3972, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006338917650282383, + "timestamp": "2025-09-10 02:24:19.617256", + "step": 3973, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:19.648959", + "step": 3973, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006893486715853214, + "timestamp": "2025-09-10 02:24:19.655825", + "step": 3974, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:19.692474", + "step": 3974, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003333700296934694, + "timestamp": "2025-09-10 02:24:19.697017", + "step": 3975, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:19.727698", + "step": 3975, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.453882037429139e-05, + "timestamp": "2025-09-10 02:24:19.760764", + "step": 3976, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:19.791106", + "step": 3976, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000572329037822783, + "timestamp": "2025-09-10 02:24:19.795706", + "step": 3977, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:19.826261", + "step": 3977, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02135993354022503, + "timestamp": "2025-09-10 02:24:19.834036", + "step": 3978, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:19.864596", + "step": 3978, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009581172838807106, + "timestamp": "2025-09-10 02:24:19.877140", + "step": 3979, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:19.907721", + "step": 3979, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009216717444360256, + "timestamp": "2025-09-10 02:24:19.936432", + "step": 3980, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:19.965471", + "step": 3980, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003136309387627989, + "timestamp": "2025-09-10 02:24:19.970967", + "step": 3981, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:20.001248", + "step": 3981, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002166020916774869, + "timestamp": "2025-09-10 02:24:20.008799", + "step": 3982, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:20.044066", + "step": 3982, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010525870602577925, + "timestamp": "2025-09-10 02:24:20.056313", + "step": 3983, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:20.086574", + "step": 3983, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007074028253555298, + "timestamp": "2025-09-10 02:24:20.114464", + "step": 3984, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:20.149163", + "step": 3984, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017534593120217323, + "timestamp": "2025-09-10 02:24:20.151241", + "step": 3985, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:20.182337", + "step": 3985, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005388972931541502, + "timestamp": "2025-09-10 02:24:20.189384", + "step": 3986, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:20.219925", + "step": 3986, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020015867426991463, + "timestamp": "2025-09-10 02:24:20.230202", + "step": 3987, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:20.265735", + "step": 3987, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004713798116426915, + "timestamp": "2025-09-10 02:24:20.290709", + "step": 3988, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:20.323084", + "step": 3988, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0058451988734304905, + "timestamp": "2025-09-10 02:24:20.330695", + "step": 3989, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:20.363663", + "step": 3989, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026393314823508263, + "timestamp": "2025-09-10 02:24:20.370585", + "step": 3990, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:20.413538", + "step": 3990, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007539827493019402, + "timestamp": "2025-09-10 02:24:20.423882", + "step": 3991, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:20.470114", + "step": 3991, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013442503288388252, + "timestamp": "2025-09-10 02:24:20.495116", + "step": 3992, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:20.526812", + "step": 3992, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014537216629832983, + "timestamp": "2025-09-10 02:24:20.529200", + "step": 3993, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:20.560254", + "step": 3993, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008240799652412534, + "timestamp": "2025-09-10 02:24:20.568128", + "step": 3994, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:20.599170", + "step": 3994, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015933796530589461, + "timestamp": "2025-09-10 02:24:20.603231", + "step": 3995, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:20.633342", + "step": 3995, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03594691678881645, + "timestamp": "2025-09-10 02:24:20.658890", + "step": 3996, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:20.689281", + "step": 3996, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000319391256198287, + "timestamp": "2025-09-10 02:24:20.694555", + "step": 3997, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:20.724910", + "step": 3997, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003305670979898423, + "timestamp": "2025-09-10 02:24:20.735146", + "step": 3998, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:20.764227", + "step": 3998, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.796909969532862e-05, + "timestamp": "2025-09-10 02:24:20.771237", + "step": 3999, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:20.800926", + "step": 3999, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020751934789586812, + "timestamp": "2025-09-10 02:24:20.833887", + "step": 4000, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 4000", + "timestamp": "2025-09-10 02:24:25.468790", + "step": 4000, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:25.512847", + "step": 4000, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006269075674936175, + "timestamp": "2025-09-10 02:24:25.516078", + "step": 4001, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:25.549501", + "step": 4001, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019980034267064184, + "timestamp": "2025-09-10 02:24:25.555522", + "step": 4002, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:25.589132", + "step": 4002, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003651339327916503, + "timestamp": "2025-09-10 02:24:25.595903", + "step": 4003, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:25.628240", + "step": 4003, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007770135416649282, + "timestamp": "2025-09-10 02:24:25.656161", + "step": 4004, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:25.690389", + "step": 4004, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008123559528030455, + "timestamp": "2025-09-10 02:24:25.703360", + "step": 4005, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:25.737137", + "step": 4005, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01008316408842802, + "timestamp": "2025-09-10 02:24:25.741004", + "step": 4006, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:25.773651", + "step": 4006, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004857528023421764, + "timestamp": "2025-09-10 02:24:25.781095", + "step": 4007, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:25.814355", + "step": 4007, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003992785350419581, + "timestamp": "2025-09-10 02:24:25.842407", + "step": 4008, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:25.876193", + "step": 4008, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000981758115813136, + "timestamp": "2025-09-10 02:24:25.881199", + "step": 4009, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:24:25.921274", + "step": 4009, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010118504287675023, + "timestamp": "2025-09-10 02:24:25.936833", + "step": 4010, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:25.979672", + "step": 4010, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013079562922939658, + "timestamp": "2025-09-10 02:24:25.986995", + "step": 4011, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:26.022946", + "step": 4011, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016620881855487823, + "timestamp": "2025-09-10 02:24:26.050856", + "step": 4012, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:26.083657", + "step": 4012, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05415716394782066, + "timestamp": "2025-09-10 02:24:26.087661", + "step": 4013, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:24:26.128422", + "step": 4013, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009211709839291871, + "timestamp": "2025-09-10 02:24:26.144033", + "step": 4014, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:24:26.187373", + "step": 4014, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0036754843313246965, + "timestamp": "2025-09-10 02:24:26.204433", + "step": 4015, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:26.238695", + "step": 4015, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007402479532174766, + "timestamp": "2025-09-10 02:24:26.267096", + "step": 4016, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:26.298194", + "step": 4016, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003799795522354543, + "timestamp": "2025-09-10 02:24:26.302393", + "step": 4017, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:26.335985", + "step": 4017, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001108443975681439, + "timestamp": "2025-09-10 02:24:26.348368", + "step": 4018, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:26.385532", + "step": 4018, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007017719559371471, + "timestamp": "2025-09-10 02:24:26.398073", + "step": 4019, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:24:26.444594", + "step": 4019, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021222028881311417, + "timestamp": "2025-09-10 02:24:26.483027", + "step": 4020, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:26.516785", + "step": 4020, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004138918302487582, + "timestamp": "2025-09-10 02:24:26.526789", + "step": 4021, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:26.559474", + "step": 4021, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011343798600137234, + "timestamp": "2025-09-10 02:24:26.566101", + "step": 4022, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:26.600887", + "step": 4022, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007483073975890875, + "timestamp": "2025-09-10 02:24:26.604769", + "step": 4023, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:26.638117", + "step": 4023, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003005236154422164, + "timestamp": "2025-09-10 02:24:26.662996", + "step": 4024, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:26.695391", + "step": 4024, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001247288309969008, + "timestamp": "2025-09-10 02:24:26.705059", + "step": 4025, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:26.738860", + "step": 4025, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006834504660218954, + "timestamp": "2025-09-10 02:24:26.749292", + "step": 4026, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:26.785078", + "step": 4026, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013955286704003811, + "timestamp": "2025-09-10 02:24:26.791860", + "step": 4027, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:26.825667", + "step": 4027, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021858804393559694, + "timestamp": "2025-09-10 02:24:26.850910", + "step": 4028, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:26.883376", + "step": 4028, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010486682003829628, + "timestamp": "2025-09-10 02:24:26.888564", + "step": 4029, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:26.924331", + "step": 4029, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023303573834709823, + "timestamp": "2025-09-10 02:24:26.931664", + "step": 4030, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:26.971160", + "step": 4030, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00035887552076019347, + "timestamp": "2025-09-10 02:24:26.981146", + "step": 4031, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:27.012992", + "step": 4031, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004720363358501345, + "timestamp": "2025-09-10 02:24:27.040899", + "step": 4032, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:27.074417", + "step": 4032, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031031426042318344, + "timestamp": "2025-09-10 02:24:27.083836", + "step": 4033, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:27.116891", + "step": 4033, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011731393169611692, + "timestamp": "2025-09-10 02:24:27.123439", + "step": 4034, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:27.156461", + "step": 4034, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004593496269080788, + "timestamp": "2025-09-10 02:24:27.158888", + "step": 4035, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:27.194989", + "step": 4035, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005191961769014597, + "timestamp": "2025-09-10 02:24:27.226408", + "step": 4036, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:24:27.267895", + "step": 4036, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007114148465916514, + "timestamp": "2025-09-10 02:24:27.283581", + "step": 4037, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:27.317423", + "step": 4037, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016225686704274267, + "timestamp": "2025-09-10 02:24:27.324612", + "step": 4038, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:27.362907", + "step": 4038, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00895176362246275, + "timestamp": "2025-09-10 02:24:27.376886", + "step": 4039, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:27.413372", + "step": 4039, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012197830947116017, + "timestamp": "2025-09-10 02:24:27.438493", + "step": 4040, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:24:27.475483", + "step": 4040, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00033447827445343137, + "timestamp": "2025-09-10 02:24:27.488606", + "step": 4041, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:27.521034", + "step": 4041, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001729366136714816, + "timestamp": "2025-09-10 02:24:27.527787", + "step": 4042, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:27.560417", + "step": 4042, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01877027377486229, + "timestamp": "2025-09-10 02:24:27.567838", + "step": 4043, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:27.600247", + "step": 4043, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001404429494868964, + "timestamp": "2025-09-10 02:24:27.627831", + "step": 4044, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:27.663723", + "step": 4044, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027306651696562767, + "timestamp": "2025-09-10 02:24:27.675805", + "step": 4045, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:27.711995", + "step": 4045, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00820981990545988, + "timestamp": "2025-09-10 02:24:27.723866", + "step": 4046, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:27.757447", + "step": 4046, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019994494505226612, + "timestamp": "2025-09-10 02:24:27.767386", + "step": 4047, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:27.800142", + "step": 4047, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016157986829057336, + "timestamp": "2025-09-10 02:24:27.825392", + "step": 4048, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:27.857196", + "step": 4048, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032153644133359194, + "timestamp": "2025-09-10 02:24:27.862081", + "step": 4049, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:27.895807", + "step": 4049, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009131658589467406, + "timestamp": "2025-09-10 02:24:27.902771", + "step": 4050, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:27.938617", + "step": 4050, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002615003613755107, + "timestamp": "2025-09-10 02:24:27.941114", + "step": 4051, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:27.972303", + "step": 4051, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024026173632591963, + "timestamp": "2025-09-10 02:24:28.003897", + "step": 4052, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:28.037982", + "step": 4052, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002667165535967797, + "timestamp": "2025-09-10 02:24:28.045276", + "step": 4053, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:28.078604", + "step": 4053, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019048672402277589, + "timestamp": "2025-09-10 02:24:28.090526", + "step": 4054, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:28.122157", + "step": 4054, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007782382308505476, + "timestamp": "2025-09-10 02:24:28.129961", + "step": 4055, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:28.166964", + "step": 4055, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01114829070866108, + "timestamp": "2025-09-10 02:24:28.201501", + "step": 4056, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:24:28.245463", + "step": 4056, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023074380587786436, + "timestamp": "2025-09-10 02:24:28.260862", + "step": 4057, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:28.293857", + "step": 4057, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002955834148451686, + "timestamp": "2025-09-10 02:24:28.301123", + "step": 4058, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:28.333365", + "step": 4058, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003895343979820609, + "timestamp": "2025-09-10 02:24:28.337305", + "step": 4059, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:28.373184", + "step": 4059, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008619307540357113, + "timestamp": "2025-09-10 02:24:28.407381", + "step": 4060, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:28.441969", + "step": 4060, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016522839665412903, + "timestamp": "2025-09-10 02:24:28.449041", + "step": 4061, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:28.483243", + "step": 4061, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006908263312652707, + "timestamp": "2025-09-10 02:24:28.492993", + "step": 4062, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:28.527778", + "step": 4062, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022160657681524754, + "timestamp": "2025-09-10 02:24:28.534885", + "step": 4063, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:28.567164", + "step": 4063, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00819767639040947, + "timestamp": "2025-09-10 02:24:28.594498", + "step": 4064, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:28.629854", + "step": 4064, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025234988424926996, + "timestamp": "2025-09-10 02:24:28.639696", + "step": 4065, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:28.671547", + "step": 4065, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017039499944075942, + "timestamp": "2025-09-10 02:24:28.678953", + "step": 4066, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:28.709858", + "step": 4066, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0045456611551344395, + "timestamp": "2025-09-10 02:24:28.720134", + "step": 4067, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:28.752404", + "step": 4067, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05136652663350105, + "timestamp": "2025-09-10 02:24:28.780690", + "step": 4068, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:28.811401", + "step": 4068, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038302938919514418, + "timestamp": "2025-09-10 02:24:28.816041", + "step": 4069, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:28.851839", + "step": 4069, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0043130056001245975, + "timestamp": "2025-09-10 02:24:28.865507", + "step": 4070, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:28.896166", + "step": 4070, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004200483672320843, + "timestamp": "2025-09-10 02:24:28.903238", + "step": 4071, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:28.935274", + "step": 4071, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009043649188242853, + "timestamp": "2025-09-10 02:24:28.959569", + "step": 4072, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:28.991351", + "step": 4072, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001501325867138803, + "timestamp": "2025-09-10 02:24:28.996915", + "step": 4073, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:29.028148", + "step": 4073, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002941501443274319, + "timestamp": "2025-09-10 02:24:29.030693", + "step": 4074, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:24:29.076492", + "step": 4074, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001308751991018653, + "timestamp": "2025-09-10 02:24:29.095675", + "step": 4075, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:29.125589", + "step": 4075, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014235320268198848, + "timestamp": "2025-09-10 02:24:29.153553", + "step": 4076, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:29.186944", + "step": 4076, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.163023524801247e-05, + "timestamp": "2025-09-10 02:24:29.197445", + "step": 4077, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:24:29.229513", + "step": 4077, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0026386440731585026, + "timestamp": "2025-09-10 02:24:29.231295", + "step": 4078, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:29.262111", + "step": 4078, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006315871723927557, + "timestamp": "2025-09-10 02:24:29.272626", + "step": 4079, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:29.304174", + "step": 4079, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019018551101908088, + "timestamp": "2025-09-10 02:24:29.329256", + "step": 4080, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:29.363347", + "step": 4080, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014254730194807053, + "timestamp": "2025-09-10 02:24:29.376636", + "step": 4081, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:29.408930", + "step": 4081, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012064416892826557, + "timestamp": "2025-09-10 02:24:29.418881", + "step": 4082, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:29.451248", + "step": 4082, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006986396852880716, + "timestamp": "2025-09-10 02:24:29.458324", + "step": 4083, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:29.495905", + "step": 4083, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00028711804770864546, + "timestamp": "2025-09-10 02:24:29.530484", + "step": 4084, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:29.562704", + "step": 4084, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.054849933832883835, + "timestamp": "2025-09-10 02:24:29.567305", + "step": 4085, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:29.596976", + "step": 4085, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015255759470164776, + "timestamp": "2025-09-10 02:24:29.603589", + "step": 4086, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:29.634971", + "step": 4086, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023272530233953148, + "timestamp": "2025-09-10 02:24:29.637246", + "step": 4087, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:29.668012", + "step": 4087, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014455585042014718, + "timestamp": "2025-09-10 02:24:29.691670", + "step": 4088, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:29.722453", + "step": 4088, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.08093362301588058, + "timestamp": "2025-09-10 02:24:29.726769", + "step": 4089, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:24:29.766804", + "step": 4089, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05044776201248169, + "timestamp": "2025-09-10 02:24:29.782688", + "step": 4090, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:29.814916", + "step": 4090, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004336885642260313, + "timestamp": "2025-09-10 02:24:29.818884", + "step": 4091, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:29.850230", + "step": 4091, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012839260511100292, + "timestamp": "2025-09-10 02:24:29.878529", + "step": 4092, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:29.910753", + "step": 4092, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022947373799979687, + "timestamp": "2025-09-10 02:24:29.915492", + "step": 4093, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:29.945821", + "step": 4093, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00331043335609138, + "timestamp": "2025-09-10 02:24:29.949679", + "step": 4094, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:29.982228", + "step": 4094, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018772233743220568, + "timestamp": "2025-09-10 02:24:29.988899", + "step": 4095, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:30.022329", + "step": 4095, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015633044531568885, + "timestamp": "2025-09-10 02:24:30.056674", + "step": 4096, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:30.087499", + "step": 4096, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007612540386617184, + "timestamp": "2025-09-10 02:24:30.092839", + "step": 4097, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:30.124846", + "step": 4097, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009734017075970769, + "timestamp": "2025-09-10 02:24:30.132521", + "step": 4098, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:30.167935", + "step": 4098, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005414964980445802, + "timestamp": "2025-09-10 02:24:30.172166", + "step": 4099, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:24:30.224308", + "step": 4099, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003065018681809306, + "timestamp": "2025-09-10 02:24:30.266654", + "step": 4100, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:30.299544", + "step": 4100, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015219785273075104, + "timestamp": "2025-09-10 02:24:30.301834", + "step": 4101, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:30.335591", + "step": 4101, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007919540512375534, + "timestamp": "2025-09-10 02:24:30.346183", + "step": 4102, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:30.382001", + "step": 4102, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004758323193527758, + "timestamp": "2025-09-10 02:24:30.385670", + "step": 4103, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:30.420886", + "step": 4103, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001412588288076222, + "timestamp": "2025-09-10 02:24:30.445924", + "step": 4104, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:30.479410", + "step": 4104, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007647694437764585, + "timestamp": "2025-09-10 02:24:30.481760", + "step": 4105, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:30.513060", + "step": 4105, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00031270290492102504, + "timestamp": "2025-09-10 02:24:30.515597", + "step": 4106, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:30.547183", + "step": 4106, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012159907491877675, + "timestamp": "2025-09-10 02:24:30.549834", + "step": 4107, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:24:30.590714", + "step": 4107, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001639689551666379, + "timestamp": "2025-09-10 02:24:30.627898", + "step": 4108, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:30.661074", + "step": 4108, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0044628409668803215, + "timestamp": "2025-09-10 02:24:30.674061", + "step": 4109, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:30.705416", + "step": 4109, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015483727911487222, + "timestamp": "2025-09-10 02:24:30.715405", + "step": 4110, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:30.746478", + "step": 4110, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012858943082392216, + "timestamp": "2025-09-10 02:24:30.756300", + "step": 4111, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:30.788159", + "step": 4111, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005279368488118052, + "timestamp": "2025-09-10 02:24:30.816741", + "step": 4112, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:30.848252", + "step": 4112, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001972366590052843, + "timestamp": "2025-09-10 02:24:30.852943", + "step": 4113, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:30.884524", + "step": 4113, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004011372511740774, + "timestamp": "2025-09-10 02:24:30.891184", + "step": 4114, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:30.922970", + "step": 4114, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024985368363559246, + "timestamp": "2025-09-10 02:24:30.926771", + "step": 4115, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:30.957507", + "step": 4115, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012376006925478578, + "timestamp": "2025-09-10 02:24:30.982801", + "step": 4116, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:24:41.499678", + "step": 4116, + "epoch": 2 + }, + { + "type": "pplx", + "content": 19906806.935294818, + "timestamp": "2025-09-10 02:24:41.520920", + "step": 4116, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:41.580625", + "step": 4116, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007578267832286656, + "timestamp": "2025-09-10 02:24:41.597958", + "step": 4117, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:41.653595", + "step": 4117, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012618020409718156, + "timestamp": "2025-09-10 02:24:41.657564", + "step": 4118, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:41.698161", + "step": 4118, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005286371451802552, + "timestamp": "2025-09-10 02:24:41.710022", + "step": 4119, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:41.789569", + "step": 4119, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003075662301853299, + "timestamp": "2025-09-10 02:24:41.817807", + "step": 4120, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:41.903531", + "step": 4120, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006497717113234103, + "timestamp": "2025-09-10 02:24:41.921573", + "step": 4121, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:42.001125", + "step": 4121, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038573991041630507, + "timestamp": "2025-09-10 02:24:42.018713", + "step": 4122, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:42.096979", + "step": 4122, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003996575251221657, + "timestamp": "2025-09-10 02:24:42.105445", + "step": 4123, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:42.150019", + "step": 4123, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023050843738019466, + "timestamp": "2025-09-10 02:24:42.180754", + "step": 4124, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:42.215093", + "step": 4124, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015655980678275228, + "timestamp": "2025-09-10 02:24:42.218567", + "step": 4125, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:42.250888", + "step": 4125, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003783722873777151, + "timestamp": "2025-09-10 02:24:42.258287", + "step": 4126, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:42.292363", + "step": 4126, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000846231181640178, + "timestamp": "2025-09-10 02:24:42.298745", + "step": 4127, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:42.330802", + "step": 4127, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018645375967025757, + "timestamp": "2025-09-10 02:24:42.358824", + "step": 4128, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:42.391712", + "step": 4128, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006940880557522178, + "timestamp": "2025-09-10 02:24:42.395966", + "step": 4129, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:42.428546", + "step": 4129, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001143784262239933, + "timestamp": "2025-09-10 02:24:42.439938", + "step": 4130, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:42.472405", + "step": 4130, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018830805784091353, + "timestamp": "2025-09-10 02:24:42.478601", + "step": 4131, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:42.510561", + "step": 4131, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007161656394600868, + "timestamp": "2025-09-10 02:24:42.539023", + "step": 4132, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:42.570242", + "step": 4132, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023870845325291157, + "timestamp": "2025-09-10 02:24:42.574528", + "step": 4133, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:42.607954", + "step": 4133, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01680189184844494, + "timestamp": "2025-09-10 02:24:42.621289", + "step": 4134, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:42.652852", + "step": 4134, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025117939803749323, + "timestamp": "2025-09-10 02:24:42.663506", + "step": 4135, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:42.694081", + "step": 4135, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01693909242749214, + "timestamp": "2025-09-10 02:24:42.717805", + "step": 4136, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:42.749274", + "step": 4136, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008668032241985202, + "timestamp": "2025-09-10 02:24:42.757944", + "step": 4137, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:42.794806", + "step": 4137, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012921657180413604, + "timestamp": "2025-09-10 02:24:42.808844", + "step": 4138, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:42.841435", + "step": 4138, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010435592848807573, + "timestamp": "2025-09-10 02:24:42.851930", + "step": 4139, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:42.883699", + "step": 4139, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005313398782163858, + "timestamp": "2025-09-10 02:24:42.908438", + "step": 4140, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:24:42.945631", + "step": 4140, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009342093952000141, + "timestamp": "2025-09-10 02:24:42.960811", + "step": 4141, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:42.992178", + "step": 4141, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005205129855312407, + "timestamp": "2025-09-10 02:24:43.003346", + "step": 4142, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:43.033595", + "step": 4142, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024474586825817823, + "timestamp": "2025-09-10 02:24:43.036392", + "step": 4143, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:43.067860", + "step": 4143, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045305112143978477, + "timestamp": "2025-09-10 02:24:43.095054", + "step": 4144, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:24:43.125624", + "step": 4144, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028814957477152348, + "timestamp": "2025-09-10 02:24:43.127838", + "step": 4145, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:43.158326", + "step": 4145, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000609158945735544, + "timestamp": "2025-09-10 02:24:43.165151", + "step": 4146, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:43.195363", + "step": 4146, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028953743167221546, + "timestamp": "2025-09-10 02:24:43.199436", + "step": 4147, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:24:43.237345", + "step": 4147, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0039150347001850605, + "timestamp": "2025-09-10 02:24:43.273804", + "step": 4148, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:43.304696", + "step": 4148, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001472481875680387, + "timestamp": "2025-09-10 02:24:43.312523", + "step": 4149, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:43.348228", + "step": 4149, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0046607027761638165, + "timestamp": "2025-09-10 02:24:43.362171", + "step": 4150, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:43.392806", + "step": 4150, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005251821596175432, + "timestamp": "2025-09-10 02:24:43.399769", + "step": 4151, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:43.431584", + "step": 4151, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006825706223025918, + "timestamp": "2025-09-10 02:24:43.456710", + "step": 4152, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:43.486943", + "step": 4152, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008997195400297642, + "timestamp": "2025-09-10 02:24:43.489487", + "step": 4153, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:43.520251", + "step": 4153, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021450709027703851, + "timestamp": "2025-09-10 02:24:43.524736", + "step": 4154, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:43.556031", + "step": 4154, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032714589033275843, + "timestamp": "2025-09-10 02:24:43.563521", + "step": 4155, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:43.598714", + "step": 4155, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018134496640414, + "timestamp": "2025-09-10 02:24:43.626447", + "step": 4156, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:43.661551", + "step": 4156, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035523748956620693, + "timestamp": "2025-09-10 02:24:43.671258", + "step": 4157, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:43.711589", + "step": 4157, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026274150237441063, + "timestamp": "2025-09-10 02:24:43.724961", + "step": 4158, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:43.759358", + "step": 4158, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005512969219125807, + "timestamp": "2025-09-10 02:24:43.772658", + "step": 4159, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:43.803614", + "step": 4159, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017312598356511444, + "timestamp": "2025-09-10 02:24:43.827260", + "step": 4160, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:43.857429", + "step": 4160, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010247546015307307, + "timestamp": "2025-09-10 02:24:43.860571", + "step": 4161, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:43.890887", + "step": 4161, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02129758708178997, + "timestamp": "2025-09-10 02:24:43.895411", + "step": 4162, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:43.929362", + "step": 4162, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00029126249137334526, + "timestamp": "2025-09-10 02:24:43.938239", + "step": 4163, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:43.981123", + "step": 4163, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000328995258314535, + "timestamp": "2025-09-10 02:24:44.015235", + "step": 4164, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:44.045898", + "step": 4164, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022505922242999077, + "timestamp": "2025-09-10 02:24:44.047935", + "step": 4165, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:44.079952", + "step": 4165, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010929142590612173, + "timestamp": "2025-09-10 02:24:44.090664", + "step": 4166, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:44.122893", + "step": 4166, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000381884427042678, + "timestamp": "2025-09-10 02:24:44.132234", + "step": 4167, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:44.164070", + "step": 4167, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008006826043128967, + "timestamp": "2025-09-10 02:24:44.188101", + "step": 4168, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:44.219173", + "step": 4168, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003847822081297636, + "timestamp": "2025-09-10 02:24:44.228953", + "step": 4169, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:44.259640", + "step": 4169, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002151952590793371, + "timestamp": "2025-09-10 02:24:44.266362", + "step": 4170, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:44.296706", + "step": 4170, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007263789302669466, + "timestamp": "2025-09-10 02:24:44.306932", + "step": 4171, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:44.337993", + "step": 4171, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014661334455013275, + "timestamp": "2025-09-10 02:24:44.370694", + "step": 4172, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:24:44.401986", + "step": 4172, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001283987076021731, + "timestamp": "2025-09-10 02:24:44.407582", + "step": 4173, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:44.442184", + "step": 4173, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033758750651031733, + "timestamp": "2025-09-10 02:24:44.455922", + "step": 4174, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:44.499085", + "step": 4174, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006651729927398264, + "timestamp": "2025-09-10 02:24:44.508438", + "step": 4175, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:44.552371", + "step": 4175, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022680295631289482, + "timestamp": "2025-09-10 02:24:44.579998", + "step": 4176, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:44.621199", + "step": 4176, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005751411896198988, + "timestamp": "2025-09-10 02:24:44.629067", + "step": 4177, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:44.675082", + "step": 4177, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00028175374609418213, + "timestamp": "2025-09-10 02:24:44.681997", + "step": 4178, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:44.728441", + "step": 4178, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025079899933189154, + "timestamp": "2025-09-10 02:24:44.742394", + "step": 4179, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:44.781232", + "step": 4179, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002579400083050132, + "timestamp": "2025-09-10 02:24:44.808965", + "step": 4180, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:44.848373", + "step": 4180, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009939942974597216, + "timestamp": "2025-09-10 02:24:44.858729", + "step": 4181, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:44.892789", + "step": 4181, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024698293418623507, + "timestamp": "2025-09-10 02:24:44.899796", + "step": 4182, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:44.933836", + "step": 4182, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015613092109560966, + "timestamp": "2025-09-10 02:24:44.941548", + "step": 4183, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:44.974017", + "step": 4183, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022488494869321585, + "timestamp": "2025-09-10 02:24:45.001793", + "step": 4184, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:24:45.035497", + "step": 4184, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015916310949251056, + "timestamp": "2025-09-10 02:24:45.037589", + "step": 4185, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:45.068522", + "step": 4185, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012442750157788396, + "timestamp": "2025-09-10 02:24:45.075681", + "step": 4186, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:45.107142", + "step": 4186, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012972517288289964, + "timestamp": "2025-09-10 02:24:45.117217", + "step": 4187, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:45.148182", + "step": 4187, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011532863572938368, + "timestamp": "2025-09-10 02:24:45.175919", + "step": 4188, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:45.207610", + "step": 4188, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004722306621260941, + "timestamp": "2025-09-10 02:24:45.209939", + "step": 4189, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:45.240550", + "step": 4189, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006129414541646838, + "timestamp": "2025-09-10 02:24:45.244713", + "step": 4190, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:45.277997", + "step": 4190, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016751401126384735, + "timestamp": "2025-09-10 02:24:45.285710", + "step": 4191, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:45.317813", + "step": 4191, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004313217068556696, + "timestamp": "2025-09-10 02:24:45.346485", + "step": 4192, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:45.379053", + "step": 4192, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0681036114692688, + "timestamp": "2025-09-10 02:24:45.387056", + "step": 4193, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:45.422109", + "step": 4193, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021573789417743683, + "timestamp": "2025-09-10 02:24:45.429574", + "step": 4194, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:45.462679", + "step": 4194, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015258920029737055, + "timestamp": "2025-09-10 02:24:45.474668", + "step": 4195, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:45.505846", + "step": 4195, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0043427967466413975, + "timestamp": "2025-09-10 02:24:45.533466", + "step": 4196, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:45.564991", + "step": 4196, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.901784217916429e-05, + "timestamp": "2025-09-10 02:24:45.574286", + "step": 4197, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:45.605121", + "step": 4197, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006661687511950731, + "timestamp": "2025-09-10 02:24:45.609203", + "step": 4198, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:45.642559", + "step": 4198, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025688271853141487, + "timestamp": "2025-09-10 02:24:45.650157", + "step": 4199, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:45.681105", + "step": 4199, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010232685133814812, + "timestamp": "2025-09-10 02:24:45.709890", + "step": 4200, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:45.743640", + "step": 4200, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017913201823830605, + "timestamp": "2025-09-10 02:24:45.749224", + "step": 4201, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:45.783408", + "step": 4201, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000634572294075042, + "timestamp": "2025-09-10 02:24:45.790563", + "step": 4202, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:45.823229", + "step": 4202, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001113194739446044, + "timestamp": "2025-09-10 02:24:45.830989", + "step": 4203, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:45.862907", + "step": 4203, + "epoch": 2 + }, + { + "type": "loss", + "content": 4.078313577338122e-05, + "timestamp": "2025-09-10 02:24:45.894873", + "step": 4204, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:45.927700", + "step": 4204, + "epoch": 2 + }, + { + "type": "loss", + "content": 7.009686669334769e-05, + "timestamp": "2025-09-10 02:24:45.940748", + "step": 4205, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:45.971903", + "step": 4205, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013100274838507175, + "timestamp": "2025-09-10 02:24:45.978878", + "step": 4206, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:46.010401", + "step": 4206, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.016267575323581696, + "timestamp": "2025-09-10 02:24:46.017892", + "step": 4207, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:46.049063", + "step": 4207, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022457198065239936, + "timestamp": "2025-09-10 02:24:46.077377", + "step": 4208, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:24:46.115376", + "step": 4208, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004984191036783159, + "timestamp": "2025-09-10 02:24:46.130853", + "step": 4209, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 848 + ], + "flops": 25154260214720 + }, + "timestamp": "2025-09-10 02:24:46.202347", + "step": 4209, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003745494468603283, + "timestamp": "2025-09-10 02:24:46.231818", + "step": 4210, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:46.263380", + "step": 4210, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00043498026207089424, + "timestamp": "2025-09-10 02:24:46.275916", + "step": 4211, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:46.306859", + "step": 4211, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00025909944088198245, + "timestamp": "2025-09-10 02:24:46.331627", + "step": 4212, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:46.362621", + "step": 4212, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015551492106169462, + "timestamp": "2025-09-10 02:24:46.365636", + "step": 4213, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:46.396632", + "step": 4213, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015605135122314095, + "timestamp": "2025-09-10 02:24:46.406815", + "step": 4214, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:46.440241", + "step": 4214, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020354004576802254, + "timestamp": "2025-09-10 02:24:46.447335", + "step": 4215, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:46.481626", + "step": 4215, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033257655799388885, + "timestamp": "2025-09-10 02:24:46.516215", + "step": 4216, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:46.549841", + "step": 4216, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032047501299530268, + "timestamp": "2025-09-10 02:24:46.563141", + "step": 4217, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:46.595764", + "step": 4217, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013879657490178943, + "timestamp": "2025-09-10 02:24:46.600157", + "step": 4218, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:24:46.643464", + "step": 4218, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00234273006208241, + "timestamp": "2025-09-10 02:24:46.661172", + "step": 4219, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:24:46.702414", + "step": 4219, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005795766483061016, + "timestamp": "2025-09-10 02:24:46.739479", + "step": 4220, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:46.770239", + "step": 4220, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004290399665478617, + "timestamp": "2025-09-10 02:24:46.772483", + "step": 4221, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:46.803784", + "step": 4221, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017445342615246773, + "timestamp": "2025-09-10 02:24:46.816337", + "step": 4222, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:46.847245", + "step": 4222, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006286733550950885, + "timestamp": "2025-09-10 02:24:46.855283", + "step": 4223, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:46.886289", + "step": 4223, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007370785460807383, + "timestamp": "2025-09-10 02:24:46.918074", + "step": 4224, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:46.948655", + "step": 4224, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004575326107442379, + "timestamp": "2025-09-10 02:24:46.957270", + "step": 4225, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:46.988637", + "step": 4225, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020077417138963938, + "timestamp": "2025-09-10 02:24:46.995633", + "step": 4226, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:47.032864", + "step": 4226, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001171283540315926, + "timestamp": "2025-09-10 02:24:47.046226", + "step": 4227, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:47.077882", + "step": 4227, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00035053075407631695, + "timestamp": "2025-09-10 02:24:47.105616", + "step": 4228, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:47.136916", + "step": 4228, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004195565707050264, + "timestamp": "2025-09-10 02:24:47.144823", + "step": 4229, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:47.176989", + "step": 4229, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0031623467803001404, + "timestamp": "2025-09-10 02:24:47.183791", + "step": 4230, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:24:47.215231", + "step": 4230, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00927420798689127, + "timestamp": "2025-09-10 02:24:47.217461", + "step": 4231, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:24:47.256876", + "step": 4231, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00882531888782978, + "timestamp": "2025-09-10 02:24:47.293361", + "step": 4232, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:47.324758", + "step": 4232, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013175641652196646, + "timestamp": "2025-09-10 02:24:47.327064", + "step": 4233, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:47.360596", + "step": 4233, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02007768489420414, + "timestamp": "2025-09-10 02:24:47.373186", + "step": 4234, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:47.405075", + "step": 4234, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014163470827043056, + "timestamp": "2025-09-10 02:24:47.415328", + "step": 4235, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:47.446112", + "step": 4235, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02005593292415142, + "timestamp": "2025-09-10 02:24:47.474100", + "step": 4236, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:24:47.510982", + "step": 4236, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009125882061198354, + "timestamp": "2025-09-10 02:24:47.526175", + "step": 4237, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:47.557015", + "step": 4237, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01142832636833191, + "timestamp": "2025-09-10 02:24:47.564834", + "step": 4238, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:47.595788", + "step": 4238, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018545877188444138, + "timestamp": "2025-09-10 02:24:47.603091", + "step": 4239, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:47.637638", + "step": 4239, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013059125049039721, + "timestamp": "2025-09-10 02:24:47.672262", + "step": 4240, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:47.704270", + "step": 4240, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013210356701165438, + "timestamp": "2025-09-10 02:24:47.708942", + "step": 4241, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:47.739973", + "step": 4241, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014471757225692272, + "timestamp": "2025-09-10 02:24:47.746657", + "step": 4242, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:47.778557", + "step": 4242, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006982952821999788, + "timestamp": "2025-09-10 02:24:47.791126", + "step": 4243, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:47.822714", + "step": 4243, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017880608793348074, + "timestamp": "2025-09-10 02:24:47.850346", + "step": 4244, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:47.880878", + "step": 4244, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005929925944656134, + "timestamp": "2025-09-10 02:24:47.886066", + "step": 4245, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:47.917504", + "step": 4245, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004261931870132685, + "timestamp": "2025-09-10 02:24:47.925253", + "step": 4246, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:47.955408", + "step": 4246, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011088837636634707, + "timestamp": "2025-09-10 02:24:47.962565", + "step": 4247, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:48.013895", + "step": 4247, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009494653902947903, + "timestamp": "2025-09-10 02:24:48.042624", + "step": 4248, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:48.072576", + "step": 4248, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014409434515982866, + "timestamp": "2025-09-10 02:24:48.077188", + "step": 4249, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:48.109097", + "step": 4249, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007126140990294516, + "timestamp": "2025-09-10 02:24:48.116845", + "step": 4250, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:24:48.171651", + "step": 4250, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010598188964650035, + "timestamp": "2025-09-10 02:24:48.195085", + "step": 4251, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:24:48.230399", + "step": 4251, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014534889487549663, + "timestamp": "2025-09-10 02:24:48.261522", + "step": 4252, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:48.293468", + "step": 4252, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003932244435418397, + "timestamp": "2025-09-10 02:24:48.297886", + "step": 4253, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:24:48.336730", + "step": 4253, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024409524630755186, + "timestamp": "2025-09-10 02:24:48.352588", + "step": 4254, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:48.383837", + "step": 4254, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022083787189330906, + "timestamp": "2025-09-10 02:24:48.390767", + "step": 4255, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:48.423234", + "step": 4255, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004927542991936207, + "timestamp": "2025-09-10 02:24:48.451762", + "step": 4256, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:48.484907", + "step": 4256, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016823627520352602, + "timestamp": "2025-09-10 02:24:48.494669", + "step": 4257, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:24:48.530118", + "step": 4257, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010247322643408552, + "timestamp": "2025-09-10 02:24:48.543899", + "step": 4258, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:48.575265", + "step": 4258, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012629638658836484, + "timestamp": "2025-09-10 02:24:48.587583", + "step": 4259, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:48.620934", + "step": 4259, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002863981993868947, + "timestamp": "2025-09-10 02:24:48.652075", + "step": 4260, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:48.685050", + "step": 4260, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011709488171618432, + "timestamp": "2025-09-10 02:24:48.698047", + "step": 4261, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:48.728803", + "step": 4261, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035543248523026705, + "timestamp": "2025-09-10 02:24:48.736430", + "step": 4262, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:24:48.768966", + "step": 4262, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005866262363269925, + "timestamp": "2025-09-10 02:24:48.773009", + "step": 4263, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:24:58.849819", + "step": 4263, + "epoch": 2 + }, + { + "type": "pplx", + "content": 22181812.0487706, + "timestamp": "2025-09-10 02:24:58.852988", + "step": 4263, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:58.883995", + "step": 4263, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003768012975342572, + "timestamp": "2025-09-10 02:24:58.916781", + "step": 4264, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:58.950995", + "step": 4264, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006254100706428289, + "timestamp": "2025-09-10 02:24:58.958279", + "step": 4265, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:24:58.989706", + "step": 4265, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002495805674698204, + "timestamp": "2025-09-10 02:24:58.992194", + "step": 4266, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:59.024254", + "step": 4266, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.783670687582344e-05, + "timestamp": "2025-09-10 02:24:59.030786", + "step": 4267, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:59.062701", + "step": 4267, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00031304662115871906, + "timestamp": "2025-09-10 02:24:59.093777", + "step": 4268, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:24:59.126070", + "step": 4268, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011215128470212221, + "timestamp": "2025-09-10 02:24:59.138732", + "step": 4269, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:59.171123", + "step": 4269, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010794185072882101, + "timestamp": "2025-09-10 02:24:59.179029", + "step": 4270, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:59.209624", + "step": 4270, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01281669456511736, + "timestamp": "2025-09-10 02:24:59.213646", + "step": 4271, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:59.245430", + "step": 4271, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017699485761113465, + "timestamp": "2025-09-10 02:24:59.273229", + "step": 4272, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:24:59.307346", + "step": 4272, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007482998538762331, + "timestamp": "2025-09-10 02:24:59.320324", + "step": 4273, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:59.351732", + "step": 4273, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002870141062885523, + "timestamp": "2025-09-10 02:24:59.355889", + "step": 4274, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:24:59.386629", + "step": 4274, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027059766580350697, + "timestamp": "2025-09-10 02:24:59.393689", + "step": 4275, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:59.424586", + "step": 4275, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001303361786995083, + "timestamp": "2025-09-10 02:24:59.455849", + "step": 4276, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:24:59.487777", + "step": 4276, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001203405117848888, + "timestamp": "2025-09-10 02:24:59.492191", + "step": 4277, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:24:59.523578", + "step": 4277, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019344912143424153, + "timestamp": "2025-09-10 02:24:59.531227", + "step": 4278, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:24:59.568424", + "step": 4278, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00048476256779395044, + "timestamp": "2025-09-10 02:24:59.582376", + "step": 4279, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:24:59.613549", + "step": 4279, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010040303459390998, + "timestamp": "2025-09-10 02:24:59.641773", + "step": 4280, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:24:59.673425", + "step": 4280, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00014088333409745246, + "timestamp": "2025-09-10 02:24:59.675811", + "step": 4281, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:59.707021", + "step": 4281, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018304158584214747, + "timestamp": "2025-09-10 02:24:59.719352", + "step": 4282, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:59.751347", + "step": 4282, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002209401864092797, + "timestamp": "2025-09-10 02:24:59.755552", + "step": 4283, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:59.787338", + "step": 4283, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04991947486996651, + "timestamp": "2025-09-10 02:24:59.812420", + "step": 4284, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:24:59.843487", + "step": 4284, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009700055816210806, + "timestamp": "2025-09-10 02:24:59.845657", + "step": 4285, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:24:59.877995", + "step": 4285, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005855086492374539, + "timestamp": "2025-09-10 02:24:59.890131", + "step": 4286, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:24:59.921077", + "step": 4286, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.032629940658807755, + "timestamp": "2025-09-10 02:24:59.933614", + "step": 4287, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:24:59.964627", + "step": 4287, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.535251981811598e-05, + "timestamp": "2025-09-10 02:24:59.995921", + "step": 4288, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:00.027327", + "step": 4288, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.947276703314856e-05, + "timestamp": "2025-09-10 02:25:00.035463", + "step": 4289, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:00.071823", + "step": 4289, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045155364205129445, + "timestamp": "2025-09-10 02:25:00.082136", + "step": 4290, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:00.113172", + "step": 4290, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010928146075457335, + "timestamp": "2025-09-10 02:25:00.123319", + "step": 4291, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:00.154266", + "step": 4291, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020004166290163994, + "timestamp": "2025-09-10 02:25:00.182271", + "step": 4292, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:00.213350", + "step": 4292, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00102779152803123, + "timestamp": "2025-09-10 02:25:00.218681", + "step": 4293, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:00.250280", + "step": 4293, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010686110181268305, + "timestamp": "2025-09-10 02:25:00.262661", + "step": 4294, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:00.292792", + "step": 4294, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020121461420785636, + "timestamp": "2025-09-10 02:25:00.305178", + "step": 4295, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:00.335235", + "step": 4295, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013605983695015311, + "timestamp": "2025-09-10 02:25:00.363064", + "step": 4296, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:00.393824", + "step": 4296, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013821868924424052, + "timestamp": "2025-09-10 02:25:00.398980", + "step": 4297, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:00.430139", + "step": 4297, + "epoch": 2 + }, + { + "type": "loss", + "content": 3.0794344638707116e-05, + "timestamp": "2025-09-10 02:25:00.437218", + "step": 4298, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:00.468044", + "step": 4298, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004364358726888895, + "timestamp": "2025-09-10 02:25:00.475072", + "step": 4299, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:00.506555", + "step": 4299, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013004717184230685, + "timestamp": "2025-09-10 02:25:00.534297", + "step": 4300, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:00.564881", + "step": 4300, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002206193981692195, + "timestamp": "2025-09-10 02:25:00.572088", + "step": 4301, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:00.603449", + "step": 4301, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.780989522463642e-05, + "timestamp": "2025-09-10 02:25:00.610376", + "step": 4302, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:00.641640", + "step": 4302, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0021686165127903223, + "timestamp": "2025-09-10 02:25:00.652197", + "step": 4303, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:00.685700", + "step": 4303, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004125793057028204, + "timestamp": "2025-09-10 02:25:00.718939", + "step": 4304, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:00.753236", + "step": 4304, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04362964630126953, + "timestamp": "2025-09-10 02:25:00.758456", + "step": 4305, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:00.791572", + "step": 4305, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016738659178372473, + "timestamp": "2025-09-10 02:25:00.795703", + "step": 4306, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:00.826855", + "step": 4306, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00039224643842317164, + "timestamp": "2025-09-10 02:25:00.833827", + "step": 4307, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:00.864064", + "step": 4307, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002121599536621943, + "timestamp": "2025-09-10 02:25:00.889610", + "step": 4308, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:00.920451", + "step": 4308, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006208484992384911, + "timestamp": "2025-09-10 02:25:00.926077", + "step": 4309, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:00.957605", + "step": 4309, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001874407462310046, + "timestamp": "2025-09-10 02:25:00.967556", + "step": 4310, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:00.998813", + "step": 4310, + "epoch": 2 + }, + { + "type": "loss", + "content": 3.587496030377224e-05, + "timestamp": "2025-09-10 02:25:01.006283", + "step": 4311, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:01.037457", + "step": 4311, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001078314846381545, + "timestamp": "2025-09-10 02:25:01.068721", + "step": 4312, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:01.100766", + "step": 4312, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012618736946024, + "timestamp": "2025-09-10 02:25:01.105848", + "step": 4313, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:01.141178", + "step": 4313, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020358415320515633, + "timestamp": "2025-09-10 02:25:01.154952", + "step": 4314, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:01.189177", + "step": 4314, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020916808862239122, + "timestamp": "2025-09-10 02:25:01.202463", + "step": 4315, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:01.233436", + "step": 4315, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011025350977433845, + "timestamp": "2025-09-10 02:25:01.261997", + "step": 4316, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:01.294453", + "step": 4316, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00029134147916920483, + "timestamp": "2025-09-10 02:25:01.303664", + "step": 4317, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:01.334769", + "step": 4317, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012424368469510227, + "timestamp": "2025-09-10 02:25:01.341911", + "step": 4318, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:01.373280", + "step": 4318, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.490887896390632e-05, + "timestamp": "2025-09-10 02:25:01.381056", + "step": 4319, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:01.412783", + "step": 4319, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00033820615499280393, + "timestamp": "2025-09-10 02:25:01.440434", + "step": 4320, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:01.472346", + "step": 4320, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010137018980458379, + "timestamp": "2025-09-10 02:25:01.479957", + "step": 4321, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:01.512010", + "step": 4321, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029211000073701143, + "timestamp": "2025-09-10 02:25:01.519422", + "step": 4322, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:01.553581", + "step": 4322, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003600471536628902, + "timestamp": "2025-09-10 02:25:01.557064", + "step": 4323, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:01.595108", + "step": 4323, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003258692449890077, + "timestamp": "2025-09-10 02:25:01.628523", + "step": 4324, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:01.665460", + "step": 4324, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018436498066876084, + "timestamp": "2025-09-10 02:25:01.669802", + "step": 4325, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:01.706002", + "step": 4325, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016462701023556292, + "timestamp": "2025-09-10 02:25:01.712863", + "step": 4326, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:01.751547", + "step": 4326, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00015942190657369792, + "timestamp": "2025-09-10 02:25:01.758328", + "step": 4327, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:01.797714", + "step": 4327, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009382445714436471, + "timestamp": "2025-09-10 02:25:01.821965", + "step": 4328, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:01.863480", + "step": 4328, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022544125386048108, + "timestamp": "2025-09-10 02:25:01.873011", + "step": 4329, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:01.907683", + "step": 4329, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011031327449018136, + "timestamp": "2025-09-10 02:25:01.911985", + "step": 4330, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:25:01.953697", + "step": 4330, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.014851606450974941, + "timestamp": "2025-09-10 02:25:01.967684", + "step": 4331, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:02.006933", + "step": 4331, + "epoch": 2 + }, + { + "type": "loss", + "content": 7.220734551083297e-05, + "timestamp": "2025-09-10 02:25:02.032416", + "step": 4332, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:02.065054", + "step": 4332, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027742632664740086, + "timestamp": "2025-09-10 02:25:02.070467", + "step": 4333, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:25:02.110243", + "step": 4333, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022008584346622229, + "timestamp": "2025-09-10 02:25:02.126339", + "step": 4334, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:02.157983", + "step": 4334, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02434348128736019, + "timestamp": "2025-09-10 02:25:02.165668", + "step": 4335, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:02.198239", + "step": 4335, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.505424244096503e-05, + "timestamp": "2025-09-10 02:25:02.229114", + "step": 4336, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:02.260400", + "step": 4336, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028443282935768366, + "timestamp": "2025-09-10 02:25:02.265722", + "step": 4337, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:02.297700", + "step": 4337, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021456856280565262, + "timestamp": "2025-09-10 02:25:02.305252", + "step": 4338, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:02.336131", + "step": 4338, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022894078574609011, + "timestamp": "2025-09-10 02:25:02.338752", + "step": 4339, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:25:02.381660", + "step": 4339, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002486018347553909, + "timestamp": "2025-09-10 02:25:02.418933", + "step": 4340, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:02.449597", + "step": 4340, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008721557445824146, + "timestamp": "2025-09-10 02:25:02.454124", + "step": 4341, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:02.484784", + "step": 4341, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025644836947321892, + "timestamp": "2025-09-10 02:25:02.492080", + "step": 4342, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:02.526057", + "step": 4342, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002075839089229703, + "timestamp": "2025-09-10 02:25:02.530195", + "step": 4343, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:02.561412", + "step": 4343, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0144526781514287, + "timestamp": "2025-09-10 02:25:02.588953", + "step": 4344, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:02.619914", + "step": 4344, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009567998349666595, + "timestamp": "2025-09-10 02:25:02.624449", + "step": 4345, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:02.658395", + "step": 4345, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002823833783622831, + "timestamp": "2025-09-10 02:25:02.665186", + "step": 4346, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:02.696805", + "step": 4346, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005126704927533865, + "timestamp": "2025-09-10 02:25:02.706668", + "step": 4347, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:02.737506", + "step": 4347, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.73050744808279e-05, + "timestamp": "2025-09-10 02:25:02.765447", + "step": 4348, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:02.797360", + "step": 4348, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014881890965625644, + "timestamp": "2025-09-10 02:25:02.807494", + "step": 4349, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:02.838380", + "step": 4349, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008507216116413474, + "timestamp": "2025-09-10 02:25:02.842906", + "step": 4350, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:02.877739", + "step": 4350, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017134180234279484, + "timestamp": "2025-09-10 02:25:02.884535", + "step": 4351, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:02.915481", + "step": 4351, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008143614046275616, + "timestamp": "2025-09-10 02:25:02.943789", + "step": 4352, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:02.974733", + "step": 4352, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005178903229534626, + "timestamp": "2025-09-10 02:25:02.977062", + "step": 4353, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:03.010208", + "step": 4353, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012402004795148969, + "timestamp": "2025-09-10 02:25:03.018067", + "step": 4354, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:03.048968", + "step": 4354, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00033361284295096993, + "timestamp": "2025-09-10 02:25:03.059247", + "step": 4355, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:03.096653", + "step": 4355, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.43372942856513e-05, + "timestamp": "2025-09-10 02:25:03.122014", + "step": 4356, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:03.152986", + "step": 4356, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010012110695242882, + "timestamp": "2025-09-10 02:25:03.162705", + "step": 4357, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:03.194345", + "step": 4357, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008857456268742681, + "timestamp": "2025-09-10 02:25:03.206853", + "step": 4358, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:03.238672", + "step": 4358, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003111858095508069, + "timestamp": "2025-09-10 02:25:03.242498", + "step": 4359, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:03.273121", + "step": 4359, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002316091413376853, + "timestamp": "2025-09-10 02:25:03.298351", + "step": 4360, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:03.329540", + "step": 4360, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009092413820326328, + "timestamp": "2025-09-10 02:25:03.334165", + "step": 4361, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:03.365369", + "step": 4361, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00018735427875071764, + "timestamp": "2025-09-10 02:25:03.377920", + "step": 4362, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:03.409909", + "step": 4362, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.026521209627389908, + "timestamp": "2025-09-10 02:25:03.416748", + "step": 4363, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:03.447157", + "step": 4363, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000702383928000927, + "timestamp": "2025-09-10 02:25:03.472725", + "step": 4364, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:03.504432", + "step": 4364, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02729278802871704, + "timestamp": "2025-09-10 02:25:03.513002", + "step": 4365, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:03.544765", + "step": 4365, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004027434333693236, + "timestamp": "2025-09-10 02:25:03.554960", + "step": 4366, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:03.585808", + "step": 4366, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002170759718865156, + "timestamp": "2025-09-10 02:25:03.593496", + "step": 4367, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:03.626118", + "step": 4367, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022459625324700028, + "timestamp": "2025-09-10 02:25:03.650096", + "step": 4368, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:03.682870", + "step": 4368, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006497269496321678, + "timestamp": "2025-09-10 02:25:03.686874", + "step": 4369, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:03.718865", + "step": 4369, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004197689704596996, + "timestamp": "2025-09-10 02:25:03.725576", + "step": 4370, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:03.757710", + "step": 4370, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021389422938227654, + "timestamp": "2025-09-10 02:25:03.760473", + "step": 4371, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:03.791399", + "step": 4371, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005672698607668281, + "timestamp": "2025-09-10 02:25:03.816643", + "step": 4372, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:03.848327", + "step": 4372, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001765951863490045, + "timestamp": "2025-09-10 02:25:03.853585", + "step": 4373, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:03.884873", + "step": 4373, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00043063057819381356, + "timestamp": "2025-09-10 02:25:03.889252", + "step": 4374, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:03.921343", + "step": 4374, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024490643409080803, + "timestamp": "2025-09-10 02:25:03.929220", + "step": 4375, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:03.960344", + "step": 4375, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008961300482042134, + "timestamp": "2025-09-10 02:25:03.985581", + "step": 4376, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:04.017214", + "step": 4376, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05845966935157776, + "timestamp": "2025-09-10 02:25:04.019595", + "step": 4377, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:04.050407", + "step": 4377, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025336632505059242, + "timestamp": "2025-09-10 02:25:04.061385", + "step": 4378, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:04.100938", + "step": 4378, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010996916331350803, + "timestamp": "2025-09-10 02:25:04.114689", + "step": 4379, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:04.149851", + "step": 4379, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00864367000758648, + "timestamp": "2025-09-10 02:25:04.184450", + "step": 4380, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:04.215617", + "step": 4380, + "epoch": 2 + }, + { + "type": "loss", + "content": 9.313374903285876e-05, + "timestamp": "2025-09-10 02:25:04.220604", + "step": 4381, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:04.254668", + "step": 4381, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00039597839349880815, + "timestamp": "2025-09-10 02:25:04.268392", + "step": 4382, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:04.308879", + "step": 4382, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007557374192401767, + "timestamp": "2025-09-10 02:25:04.316489", + "step": 4383, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:04.356029", + "step": 4383, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00034854214754886925, + "timestamp": "2025-09-10 02:25:04.384274", + "step": 4384, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:04.416262", + "step": 4384, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001689386263024062, + "timestamp": "2025-09-10 02:25:04.420840", + "step": 4385, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:04.451403", + "step": 4385, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041699831490404904, + "timestamp": "2025-09-10 02:25:04.455584", + "step": 4386, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:04.486246", + "step": 4386, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.07199867814779282, + "timestamp": "2025-09-10 02:25:04.489002", + "step": 4387, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:04.520589", + "step": 4387, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04156893119215965, + "timestamp": "2025-09-10 02:25:04.549335", + "step": 4388, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:04.581471", + "step": 4388, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010044872760772705, + "timestamp": "2025-09-10 02:25:04.594236", + "step": 4389, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:04.627227", + "step": 4389, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009525059722363949, + "timestamp": "2025-09-10 02:25:04.634148", + "step": 4390, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:04.666518", + "step": 4390, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010987967252731323, + "timestamp": "2025-09-10 02:25:04.677857", + "step": 4391, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:04.710710", + "step": 4391, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00037763340515084565, + "timestamp": "2025-09-10 02:25:04.738121", + "step": 4392, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:25:04.782454", + "step": 4392, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009227042086422443, + "timestamp": "2025-09-10 02:25:04.801406", + "step": 4393, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:04.836946", + "step": 4393, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0056563569232821465, + "timestamp": "2025-09-10 02:25:04.843328", + "step": 4394, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:04.878055", + "step": 4394, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002920966362580657, + "timestamp": "2025-09-10 02:25:04.891381", + "step": 4395, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:04.923937", + "step": 4395, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005617746501229703, + "timestamp": "2025-09-10 02:25:04.951134", + "step": 4396, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:04.985495", + "step": 4396, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0088628139346838, + "timestamp": "2025-09-10 02:25:04.988524", + "step": 4397, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:05.022980", + "step": 4397, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002569663105532527, + "timestamp": "2025-09-10 02:25:05.028624", + "step": 4398, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:05.064404", + "step": 4398, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004381218459457159, + "timestamp": "2025-09-10 02:25:05.078266", + "step": 4399, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:05.110148", + "step": 4399, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00011449779412942007, + "timestamp": "2025-09-10 02:25:05.137669", + "step": 4400, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:25:05.175220", + "step": 4400, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009381847339682281, + "timestamp": "2025-09-10 02:25:05.190670", + "step": 4401, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:05.225844", + "step": 4401, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003086991375312209, + "timestamp": "2025-09-10 02:25:05.239188", + "step": 4402, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:05.270382", + "step": 4402, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008446700521744788, + "timestamp": "2025-09-10 02:25:05.274812", + "step": 4403, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:05.306126", + "step": 4403, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012834797613322735, + "timestamp": "2025-09-10 02:25:05.331405", + "step": 4404, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:05.362930", + "step": 4404, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002319957595318556, + "timestamp": "2025-09-10 02:25:05.370483", + "step": 4405, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:05.402303", + "step": 4405, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.021244987845420837, + "timestamp": "2025-09-10 02:25:05.406316", + "step": 4406, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:05.438704", + "step": 4406, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019213539781048894, + "timestamp": "2025-09-10 02:25:05.446034", + "step": 4407, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:05.479814", + "step": 4407, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02890808694064617, + "timestamp": "2025-09-10 02:25:05.514178", + "step": 4408, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:05.546653", + "step": 4408, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007281397935003042, + "timestamp": "2025-09-10 02:25:05.551380", + "step": 4409, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:05.583266", + "step": 4409, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019025967048946768, + "timestamp": "2025-09-10 02:25:05.587500", + "step": 4410, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:25:15.850616", + "step": 4410, + "epoch": 2 + }, + { + "type": "pplx", + "content": 20370479.949023202, + "timestamp": "2025-09-10 02:25:15.854937", + "step": 4410, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:15.885941", + "step": 4410, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011351705761626363, + "timestamp": "2025-09-10 02:25:15.895670", + "step": 4411, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:15.930248", + "step": 4411, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018093172693625093, + "timestamp": "2025-09-10 02:25:15.953841", + "step": 4412, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:15.985315", + "step": 4412, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025083277374505997, + "timestamp": "2025-09-10 02:25:15.989623", + "step": 4413, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:16.019818", + "step": 4413, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019721267744898796, + "timestamp": "2025-09-10 02:25:16.032036", + "step": 4414, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:16.061635", + "step": 4414, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004431180714163929, + "timestamp": "2025-09-10 02:25:16.071615", + "step": 4415, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:16.102136", + "step": 4415, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006176200695335865, + "timestamp": "2025-09-10 02:25:16.133398", + "step": 4416, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:16.163904", + "step": 4416, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014073234051465988, + "timestamp": "2025-09-10 02:25:16.176495", + "step": 4417, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:16.208014", + "step": 4417, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0057088471949100494, + "timestamp": "2025-09-10 02:25:16.220069", + "step": 4418, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:16.250957", + "step": 4418, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012675122125074267, + "timestamp": "2025-09-10 02:25:16.257909", + "step": 4419, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:16.288247", + "step": 4419, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01437693927437067, + "timestamp": "2025-09-10 02:25:16.316874", + "step": 4420, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:16.347430", + "step": 4420, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025128854904323816, + "timestamp": "2025-09-10 02:25:16.352338", + "step": 4421, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:16.382941", + "step": 4421, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003963653929531574, + "timestamp": "2025-09-10 02:25:16.385672", + "step": 4422, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:16.416172", + "step": 4422, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020399346947669983, + "timestamp": "2025-09-10 02:25:16.423691", + "step": 4423, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:16.453886", + "step": 4423, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03138017654418945, + "timestamp": "2025-09-10 02:25:16.487035", + "step": 4424, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:16.517208", + "step": 4424, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019118200289085507, + "timestamp": "2025-09-10 02:25:16.526815", + "step": 4425, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:16.557895", + "step": 4425, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005137981381267309, + "timestamp": "2025-09-10 02:25:16.565386", + "step": 4426, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:16.596353", + "step": 4426, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001418622094206512, + "timestamp": "2025-09-10 02:25:16.607299", + "step": 4427, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:16.638091", + "step": 4427, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003645730612333864, + "timestamp": "2025-09-10 02:25:16.666574", + "step": 4428, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:16.696492", + "step": 4428, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00549284229055047, + "timestamp": "2025-09-10 02:25:16.701706", + "step": 4429, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:16.732318", + "step": 4429, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04728490859270096, + "timestamp": "2025-09-10 02:25:16.739376", + "step": 4430, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:16.769497", + "step": 4430, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001036238856613636, + "timestamp": "2025-09-10 02:25:16.781689", + "step": 4431, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:16.816547", + "step": 4431, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009465152397751808, + "timestamp": "2025-09-10 02:25:16.851174", + "step": 4432, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:16.881588", + "step": 4432, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012627379037439823, + "timestamp": "2025-09-10 02:25:16.883722", + "step": 4433, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:16.913932", + "step": 4433, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041100315866060555, + "timestamp": "2025-09-10 02:25:16.916237", + "step": 4434, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:16.945630", + "step": 4434, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013999169459566474, + "timestamp": "2025-09-10 02:25:16.953387", + "step": 4435, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:16.983926", + "step": 4435, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02100582979619503, + "timestamp": "2025-09-10 02:25:17.012694", + "step": 4436, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:25:17.053341", + "step": 4436, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001676683546975255, + "timestamp": "2025-09-10 02:25:17.070677", + "step": 4437, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:17.101443", + "step": 4437, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023947085719555616, + "timestamp": "2025-09-10 02:25:17.109357", + "step": 4438, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:17.140639", + "step": 4438, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.027537260204553604, + "timestamp": "2025-09-10 02:25:17.147759", + "step": 4439, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:17.177732", + "step": 4439, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008474222384393215, + "timestamp": "2025-09-10 02:25:17.210732", + "step": 4440, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:17.240867", + "step": 4440, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005611742846667767, + "timestamp": "2025-09-10 02:25:17.243416", + "step": 4441, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:17.281556", + "step": 4441, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019225550349801779, + "timestamp": "2025-09-10 02:25:17.297179", + "step": 4442, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:17.329927", + "step": 4442, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001591675216332078, + "timestamp": "2025-09-10 02:25:17.336903", + "step": 4443, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:17.367487", + "step": 4443, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011540406849235296, + "timestamp": "2025-09-10 02:25:17.395419", + "step": 4444, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:25:17.431151", + "step": 4444, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0016605369746685028, + "timestamp": "2025-09-10 02:25:17.444438", + "step": 4445, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:17.474779", + "step": 4445, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.033832911401987076, + "timestamp": "2025-09-10 02:25:17.477354", + "step": 4446, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:17.507953", + "step": 4446, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02437257580459118, + "timestamp": "2025-09-10 02:25:17.520474", + "step": 4447, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:17.557930", + "step": 4447, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006624148809351027, + "timestamp": "2025-09-10 02:25:17.594424", + "step": 4448, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:17.626626", + "step": 4448, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005153920501470566, + "timestamp": "2025-09-10 02:25:17.628614", + "step": 4449, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:17.666898", + "step": 4449, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.025530895218253136, + "timestamp": "2025-09-10 02:25:17.682503", + "step": 4450, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:17.714804", + "step": 4450, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00903196632862091, + "timestamp": "2025-09-10 02:25:17.722651", + "step": 4451, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:17.756406", + "step": 4451, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018469911068677902, + "timestamp": "2025-09-10 02:25:17.788252", + "step": 4452, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:17.823443", + "step": 4452, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00648995628580451, + "timestamp": "2025-09-10 02:25:17.828496", + "step": 4453, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:17.859668", + "step": 4453, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0027806435246020555, + "timestamp": "2025-09-10 02:25:17.869766", + "step": 4454, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:17.899869", + "step": 4454, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024515967816114426, + "timestamp": "2025-09-10 02:25:17.906768", + "step": 4455, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:17.937126", + "step": 4455, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002551526122260839, + "timestamp": "2025-09-10 02:25:17.969578", + "step": 4456, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:17.999872", + "step": 4456, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003521733218804002, + "timestamp": "2025-09-10 02:25:18.008432", + "step": 4457, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:18.038618", + "step": 4457, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038238188717514277, + "timestamp": "2025-09-10 02:25:18.041246", + "step": 4458, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:18.071557", + "step": 4458, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001157488557510078, + "timestamp": "2025-09-10 02:25:18.076059", + "step": 4459, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:18.106247", + "step": 4459, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006657246965914965, + "timestamp": "2025-09-10 02:25:18.136622", + "step": 4460, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:18.169556", + "step": 4460, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0043445127084851265, + "timestamp": "2025-09-10 02:25:18.177946", + "step": 4461, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:18.212263", + "step": 4461, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006325058173388243, + "timestamp": "2025-09-10 02:25:18.225631", + "step": 4462, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:18.255544", + "step": 4462, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.031205790117383003, + "timestamp": "2025-09-10 02:25:18.260132", + "step": 4463, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:18.293387", + "step": 4463, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006542644929140806, + "timestamp": "2025-09-10 02:25:18.327670", + "step": 4464, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:18.358221", + "step": 4464, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034815913531929255, + "timestamp": "2025-09-10 02:25:18.360567", + "step": 4465, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:18.393096", + "step": 4465, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0044549135491251945, + "timestamp": "2025-09-10 02:25:18.403353", + "step": 4466, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:18.435701", + "step": 4466, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007206571288406849, + "timestamp": "2025-09-10 02:25:18.446597", + "step": 4467, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:18.479836", + "step": 4467, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019098568009212613, + "timestamp": "2025-09-10 02:25:18.514083", + "step": 4468, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:18.545584", + "step": 4468, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009369590319693089, + "timestamp": "2025-09-10 02:25:18.553335", + "step": 4469, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:18.585127", + "step": 4469, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004385852254927158, + "timestamp": "2025-09-10 02:25:18.592851", + "step": 4470, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:18.624213", + "step": 4470, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033103374298661947, + "timestamp": "2025-09-10 02:25:18.631120", + "step": 4471, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:18.663428", + "step": 4471, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.023561924695968628, + "timestamp": "2025-09-10 02:25:18.691155", + "step": 4472, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:18.722928", + "step": 4472, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012181418016552925, + "timestamp": "2025-09-10 02:25:18.728189", + "step": 4473, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:18.765187", + "step": 4473, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001010423176921904, + "timestamp": "2025-09-10 02:25:18.772216", + "step": 4474, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:18.805784", + "step": 4474, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.013591425493359566, + "timestamp": "2025-09-10 02:25:18.812254", + "step": 4475, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:18.849404", + "step": 4475, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007945683784782887, + "timestamp": "2025-09-10 02:25:18.873116", + "step": 4476, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:18.903332", + "step": 4476, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010526351630687714, + "timestamp": "2025-09-10 02:25:18.913240", + "step": 4477, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:18.950587", + "step": 4477, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0060272216796875, + "timestamp": "2025-09-10 02:25:18.954742", + "step": 4478, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:18.987285", + "step": 4478, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010436930693686008, + "timestamp": "2025-09-10 02:25:18.994660", + "step": 4479, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:19.025145", + "step": 4479, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002587387105450034, + "timestamp": "2025-09-10 02:25:19.056094", + "step": 4480, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:19.086029", + "step": 4480, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002534937346354127, + "timestamp": "2025-09-10 02:25:19.090628", + "step": 4481, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:19.123598", + "step": 4481, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03924372047185898, + "timestamp": "2025-09-10 02:25:19.134548", + "step": 4482, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:19.166117", + "step": 4482, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0028582927770912647, + "timestamp": "2025-09-10 02:25:19.173122", + "step": 4483, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:19.205766", + "step": 4483, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013124326942488551, + "timestamp": "2025-09-10 02:25:19.231252", + "step": 4484, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:19.262019", + "step": 4484, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000659207405988127, + "timestamp": "2025-09-10 02:25:19.269985", + "step": 4485, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:19.299946", + "step": 4485, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035707519855350256, + "timestamp": "2025-09-10 02:25:19.302674", + "step": 4486, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:19.333939", + "step": 4486, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003890756983309984, + "timestamp": "2025-09-10 02:25:19.338618", + "step": 4487, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:19.367790", + "step": 4487, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003235712181776762, + "timestamp": "2025-09-10 02:25:19.391384", + "step": 4488, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:19.421271", + "step": 4488, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025943939108401537, + "timestamp": "2025-09-10 02:25:19.429195", + "step": 4489, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:19.459709", + "step": 4489, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013656103983521461, + "timestamp": "2025-09-10 02:25:19.472263", + "step": 4490, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:19.502937", + "step": 4490, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037658896762877703, + "timestamp": "2025-09-10 02:25:19.509624", + "step": 4491, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:19.541567", + "step": 4491, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002117105294018984, + "timestamp": "2025-09-10 02:25:19.574046", + "step": 4492, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:19.609978", + "step": 4492, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034039022866636515, + "timestamp": "2025-09-10 02:25:19.625185", + "step": 4493, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:19.660066", + "step": 4493, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007604501210153103, + "timestamp": "2025-09-10 02:25:19.673916", + "step": 4494, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:19.706338", + "step": 4494, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011244564084336162, + "timestamp": "2025-09-10 02:25:19.713059", + "step": 4495, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:19.746550", + "step": 4495, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006800326984375715, + "timestamp": "2025-09-10 02:25:19.780819", + "step": 4496, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:19.813417", + "step": 4496, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01871911995112896, + "timestamp": "2025-09-10 02:25:19.818464", + "step": 4497, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:19.848580", + "step": 4497, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017440064111724496, + "timestamp": "2025-09-10 02:25:19.851210", + "step": 4498, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:19.881445", + "step": 4498, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011718474561348557, + "timestamp": "2025-09-10 02:25:19.891679", + "step": 4499, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:19.922466", + "step": 4499, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032657107803970575, + "timestamp": "2025-09-10 02:25:19.953441", + "step": 4500, + "epoch": 2 + }, + { + "type": "info", + "content": "Checkpoint saved at step 4500", + "timestamp": "2025-09-10 02:25:24.557607", + "step": 4500, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:24.600123", + "step": 4500, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034066697116941214, + "timestamp": "2025-09-10 02:25:24.610227", + "step": 4501, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:24.646491", + "step": 4501, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002542842412367463, + "timestamp": "2025-09-10 02:25:24.653568", + "step": 4502, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:24.685272", + "step": 4502, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002150564454495907, + "timestamp": "2025-09-10 02:25:24.697290", + "step": 4503, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:24.729074", + "step": 4503, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001930785016156733, + "timestamp": "2025-09-10 02:25:24.757013", + "step": 4504, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:24.789288", + "step": 4504, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003784495871514082, + "timestamp": "2025-09-10 02:25:24.793966", + "step": 4505, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:24.826430", + "step": 4505, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.010695603676140308, + "timestamp": "2025-09-10 02:25:24.833409", + "step": 4506, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:24.868356", + "step": 4506, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0040979208424687386, + "timestamp": "2025-09-10 02:25:24.870690", + "step": 4507, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:25:24.905472", + "step": 4507, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002046718029305339, + "timestamp": "2025-09-10 02:25:24.940320", + "step": 4508, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:24.972380", + "step": 4508, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0030817301012575626, + "timestamp": "2025-09-10 02:25:24.976751", + "step": 4509, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:25.009031", + "step": 4509, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035417492035776377, + "timestamp": "2025-09-10 02:25:25.021505", + "step": 4510, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:25.052846", + "step": 4510, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019691623747348785, + "timestamp": "2025-09-10 02:25:25.060224", + "step": 4511, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:25.092805", + "step": 4511, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0045999689027667046, + "timestamp": "2025-09-10 02:25:25.120632", + "step": 4512, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:25.152028", + "step": 4512, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011519378749653697, + "timestamp": "2025-09-10 02:25:25.156253", + "step": 4513, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:25.190703", + "step": 4513, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003534820629283786, + "timestamp": "2025-09-10 02:25:25.204538", + "step": 4514, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:25.235373", + "step": 4514, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012388339964672923, + "timestamp": "2025-09-10 02:25:25.242299", + "step": 4515, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:25.273546", + "step": 4515, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004160807467997074, + "timestamp": "2025-09-10 02:25:25.301250", + "step": 4516, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:25.333171", + "step": 4516, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.022341666743159294, + "timestamp": "2025-09-10 02:25:25.337743", + "step": 4517, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:25.370232", + "step": 4517, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011338494950905442, + "timestamp": "2025-09-10 02:25:25.381798", + "step": 4518, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:25.414671", + "step": 4518, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.015123574994504452, + "timestamp": "2025-09-10 02:25:25.421016", + "step": 4519, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:25.452284", + "step": 4519, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006013158708810806, + "timestamp": "2025-09-10 02:25:25.477041", + "step": 4520, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:25.510533", + "step": 4520, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004607627633959055, + "timestamp": "2025-09-10 02:25:25.523661", + "step": 4521, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:25.555730", + "step": 4521, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005228667287155986, + "timestamp": "2025-09-10 02:25:25.563370", + "step": 4522, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:25.594818", + "step": 4522, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018962175818160176, + "timestamp": "2025-09-10 02:25:25.604537", + "step": 4523, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:25.635292", + "step": 4523, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010046407114714384, + "timestamp": "2025-09-10 02:25:25.666066", + "step": 4524, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:25.698133", + "step": 4524, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004124719125684351, + "timestamp": "2025-09-10 02:25:25.705611", + "step": 4525, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:25.737235", + "step": 4525, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00014472127077169716, + "timestamp": "2025-09-10 02:25:25.741466", + "step": 4526, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:25.772457", + "step": 4526, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0033601843751966953, + "timestamp": "2025-09-10 02:25:25.778985", + "step": 4527, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:25.811525", + "step": 4527, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00247010076418519, + "timestamp": "2025-09-10 02:25:25.842144", + "step": 4528, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:25.873629", + "step": 4528, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.03199951723217964, + "timestamp": "2025-09-10 02:25:25.877737", + "step": 4529, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:25.911048", + "step": 4529, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004257894121110439, + "timestamp": "2025-09-10 02:25:25.918120", + "step": 4530, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:25.949885", + "step": 4530, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010970250004902482, + "timestamp": "2025-09-10 02:25:25.956494", + "step": 4531, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:25.988518", + "step": 4531, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010734976967796683, + "timestamp": "2025-09-10 02:25:26.019911", + "step": 4532, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:26.051703", + "step": 4532, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022549789864569902, + "timestamp": "2025-09-10 02:25:26.061138", + "step": 4533, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:26.092505", + "step": 4533, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005263431929051876, + "timestamp": "2025-09-10 02:25:26.095987", + "step": 4534, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:26.127450", + "step": 4534, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00026051278109662235, + "timestamp": "2025-09-10 02:25:26.130043", + "step": 4535, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:26.168166", + "step": 4535, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0022305804304778576, + "timestamp": "2025-09-10 02:25:26.191905", + "step": 4536, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:26.224731", + "step": 4536, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0032375783193856478, + "timestamp": "2025-09-10 02:25:26.237353", + "step": 4537, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:26.268987", + "step": 4537, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010004842188209295, + "timestamp": "2025-09-10 02:25:26.275711", + "step": 4538, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:26.308250", + "step": 4538, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000708259001839906, + "timestamp": "2025-09-10 02:25:26.315660", + "step": 4539, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:26.347929", + "step": 4539, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020569520071148872, + "timestamp": "2025-09-10 02:25:26.372621", + "step": 4540, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:26.405003", + "step": 4540, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.009725110605359077, + "timestamp": "2025-09-10 02:25:26.412294", + "step": 4541, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:26.443431", + "step": 4541, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003039458068087697, + "timestamp": "2025-09-10 02:25:26.447287", + "step": 4542, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:26.478145", + "step": 4542, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0049249157309532166, + "timestamp": "2025-09-10 02:25:26.482075", + "step": 4543, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:26.517540", + "step": 4543, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006834098021499813, + "timestamp": "2025-09-10 02:25:26.552245", + "step": 4544, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:26.583654", + "step": 4544, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00034481266629882157, + "timestamp": "2025-09-10 02:25:26.587683", + "step": 4545, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:26.629070", + "step": 4545, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006935402750968933, + "timestamp": "2025-09-10 02:25:26.633268", + "step": 4546, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:26.664858", + "step": 4546, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.006434209179133177, + "timestamp": "2025-09-10 02:25:26.672410", + "step": 4547, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:25:26.713922", + "step": 4547, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.01216146256774664, + "timestamp": "2025-09-10 02:25:26.751858", + "step": 4548, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:26.783543", + "step": 4548, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008372723124921322, + "timestamp": "2025-09-10 02:25:26.786885", + "step": 4549, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:26.819839", + "step": 4549, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00022385262127500027, + "timestamp": "2025-09-10 02:25:26.826651", + "step": 4550, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:26.858276", + "step": 4550, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.007002050522714853, + "timestamp": "2025-09-10 02:25:26.870536", + "step": 4551, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:26.903103", + "step": 4551, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005396933993324637, + "timestamp": "2025-09-10 02:25:26.931106", + "step": 4552, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:26.962944", + "step": 4552, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024629905819892883, + "timestamp": "2025-09-10 02:25:26.970969", + "step": 4553, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:27.002522", + "step": 4553, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00016223655256908387, + "timestamp": "2025-09-10 02:25:27.006138", + "step": 4554, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:27.037871", + "step": 4554, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0015956457937136292, + "timestamp": "2025-09-10 02:25:27.040497", + "step": 4555, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:27.071104", + "step": 4555, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005312658613547683, + "timestamp": "2025-09-10 02:25:27.095029", + "step": 4556, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:27.127004", + "step": 4556, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002805754542350769, + "timestamp": "2025-09-10 02:25:27.129456", + "step": 4557, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:25:37.248941", + "step": 4557, + "epoch": 2 + }, + { + "type": "pplx", + "content": 22196068.675676793, + "timestamp": "2025-09-10 02:25:37.251722", + "step": 4557, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:37.281897", + "step": 4557, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010733563685789704, + "timestamp": "2025-09-10 02:25:37.285638", + "step": 4558, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:37.317115", + "step": 4558, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.339493145584129e-05, + "timestamp": "2025-09-10 02:25:37.324842", + "step": 4559, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:37.357123", + "step": 4559, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.012812472879886627, + "timestamp": "2025-09-10 02:25:37.384863", + "step": 4560, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:37.415764", + "step": 4560, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008561398833990097, + "timestamp": "2025-09-10 02:25:37.418114", + "step": 4561, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:37.449223", + "step": 4561, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012836763926316053, + "timestamp": "2025-09-10 02:25:37.456292", + "step": 4562, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:37.486863", + "step": 4562, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001862238277681172, + "timestamp": "2025-09-10 02:25:37.499099", + "step": 4563, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:37.529385", + "step": 4563, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009596589952707291, + "timestamp": "2025-09-10 02:25:37.557360", + "step": 4564, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:37.588280", + "step": 4564, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013999083312228322, + "timestamp": "2025-09-10 02:25:37.596221", + "step": 4565, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:37.627323", + "step": 4565, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009914558613672853, + "timestamp": "2025-09-10 02:25:37.634287", + "step": 4566, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:37.664921", + "step": 4566, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002980300458148122, + "timestamp": "2025-09-10 02:25:37.672667", + "step": 4567, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:37.703443", + "step": 4567, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020019218209199607, + "timestamp": "2025-09-10 02:25:37.732258", + "step": 4568, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:37.763055", + "step": 4568, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008754459558986127, + "timestamp": "2025-09-10 02:25:37.767499", + "step": 4569, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:37.797578", + "step": 4569, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008850363665260375, + "timestamp": "2025-09-10 02:25:37.804781", + "step": 4570, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:37.835457", + "step": 4570, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006145837833173573, + "timestamp": "2025-09-10 02:25:37.842706", + "step": 4571, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:37.873094", + "step": 4571, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006157992756925523, + "timestamp": "2025-09-10 02:25:37.904941", + "step": 4572, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:37.935999", + "step": 4572, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00951096136122942, + "timestamp": "2025-09-10 02:25:37.940852", + "step": 4573, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:37.972196", + "step": 4573, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.034846000373363495, + "timestamp": "2025-09-10 02:25:37.979654", + "step": 4574, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:38.011724", + "step": 4574, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007103482494130731, + "timestamp": "2025-09-10 02:25:38.015941", + "step": 4575, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:38.047396", + "step": 4575, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004654258373193443, + "timestamp": "2025-09-10 02:25:38.075283", + "step": 4576, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:38.109652", + "step": 4576, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002614832716062665, + "timestamp": "2025-09-10 02:25:38.118519", + "step": 4577, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:38.150260", + "step": 4577, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.017886726185679436, + "timestamp": "2025-09-10 02:25:38.157172", + "step": 4578, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:38.188687", + "step": 4578, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00023473672627005726, + "timestamp": "2025-09-10 02:25:38.195723", + "step": 4579, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:38.227708", + "step": 4579, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004447103419806808, + "timestamp": "2025-09-10 02:25:38.255265", + "step": 4580, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:38.288925", + "step": 4580, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0046869367361068726, + "timestamp": "2025-09-10 02:25:38.295757", + "step": 4581, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:38.327961", + "step": 4581, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004977317294105887, + "timestamp": "2025-09-10 02:25:38.337638", + "step": 4582, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:38.370544", + "step": 4582, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006395676755346358, + "timestamp": "2025-09-10 02:25:38.382421", + "step": 4583, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:38.413797", + "step": 4583, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0017181943403556943, + "timestamp": "2025-09-10 02:25:38.442673", + "step": 4584, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:38.474975", + "step": 4584, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017127768660429865, + "timestamp": "2025-09-10 02:25:38.479489", + "step": 4585, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:38.510902", + "step": 4585, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008789977291598916, + "timestamp": "2025-09-10 02:25:38.514803", + "step": 4586, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:38.546954", + "step": 4586, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000202530252863653, + "timestamp": "2025-09-10 02:25:38.553713", + "step": 4587, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:25:38.595580", + "step": 4587, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02567419223487377, + "timestamp": "2025-09-10 02:25:38.633726", + "step": 4588, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:38.665670", + "step": 4588, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0034661719109863043, + "timestamp": "2025-09-10 02:25:38.673159", + "step": 4589, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:38.705050", + "step": 4589, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012347496813163161, + "timestamp": "2025-09-10 02:25:38.712603", + "step": 4590, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:38.745491", + "step": 4590, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001653311337577179, + "timestamp": "2025-09-10 02:25:38.752873", + "step": 4591, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:38.784280", + "step": 4591, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006502936012111604, + "timestamp": "2025-09-10 02:25:38.809196", + "step": 4592, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:25:38.846360", + "step": 4592, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027454280643723905, + "timestamp": "2025-09-10 02:25:38.861692", + "step": 4593, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:38.893021", + "step": 4593, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001545843406347558, + "timestamp": "2025-09-10 02:25:38.897115", + "step": 4594, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:38.927715", + "step": 4594, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.05455930903553963, + "timestamp": "2025-09-10 02:25:38.932332", + "step": 4595, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:38.964182", + "step": 4595, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00358793162740767, + "timestamp": "2025-09-10 02:25:38.992494", + "step": 4596, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:39.024115", + "step": 4596, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006846991600468755, + "timestamp": "2025-09-10 02:25:39.032247", + "step": 4597, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:39.065109", + "step": 4597, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000762683164793998, + "timestamp": "2025-09-10 02:25:39.072321", + "step": 4598, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:39.105738", + "step": 4598, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005580178694799542, + "timestamp": "2025-09-10 02:25:39.119148", + "step": 4599, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:39.149868", + "step": 4599, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.04828514903783798, + "timestamp": "2025-09-10 02:25:39.174906", + "step": 4600, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:39.212885", + "step": 4600, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0019080432830378413, + "timestamp": "2025-09-10 02:25:39.217720", + "step": 4601, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:39.249614", + "step": 4601, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021091777307447046, + "timestamp": "2025-09-10 02:25:39.256791", + "step": 4602, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:25:39.300909", + "step": 4602, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009871380170807242, + "timestamp": "2025-09-10 02:25:39.318473", + "step": 4603, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:39.350082", + "step": 4603, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004607336886692792, + "timestamp": "2025-09-10 02:25:39.380362", + "step": 4604, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:39.411906", + "step": 4604, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006659630453214049, + "timestamp": "2025-09-10 02:25:39.419587", + "step": 4605, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:39.449968", + "step": 4605, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001328265992924571, + "timestamp": "2025-09-10 02:25:39.456704", + "step": 4606, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:39.487495", + "step": 4606, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017410985310561955, + "timestamp": "2025-09-10 02:25:39.490210", + "step": 4607, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:39.521017", + "step": 4607, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004943975363858044, + "timestamp": "2025-09-10 02:25:39.549176", + "step": 4608, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:25:39.587044", + "step": 4608, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005744884721934795, + "timestamp": "2025-09-10 02:25:39.602492", + "step": 4609, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:39.634627", + "step": 4609, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007691961363889277, + "timestamp": "2025-09-10 02:25:39.645597", + "step": 4610, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:39.676945", + "step": 4610, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00013593518815468997, + "timestamp": "2025-09-10 02:25:39.684017", + "step": 4611, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:39.715445", + "step": 4611, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0014134369557723403, + "timestamp": "2025-09-10 02:25:39.743812", + "step": 4612, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:39.775669", + "step": 4612, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0025131232105195522, + "timestamp": "2025-09-10 02:25:39.781144", + "step": 4613, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:39.815817", + "step": 4613, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002813456521835178, + "timestamp": "2025-09-10 02:25:39.829621", + "step": 4614, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:39.860677", + "step": 4614, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.003985443152487278, + "timestamp": "2025-09-10 02:25:39.868027", + "step": 4615, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:39.899801", + "step": 4615, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.440089055104181e-05, + "timestamp": "2025-09-10 02:25:39.928044", + "step": 4616, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:39.958592", + "step": 4616, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004450418520718813, + "timestamp": "2025-09-10 02:25:39.963542", + "step": 4617, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:39.994245", + "step": 4617, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024432761128991842, + "timestamp": "2025-09-10 02:25:40.004305", + "step": 4618, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:40.034812", + "step": 4618, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.008134338073432446, + "timestamp": "2025-09-10 02:25:40.038920", + "step": 4619, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:40.076477", + "step": 4619, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0038712576497346163, + "timestamp": "2025-09-10 02:25:40.113034", + "step": 4620, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:40.143839", + "step": 4620, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024131852842401713, + "timestamp": "2025-09-10 02:25:40.148586", + "step": 4621, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:40.179389", + "step": 4621, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002276496816193685, + "timestamp": "2025-09-10 02:25:40.190457", + "step": 4622, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:40.227228", + "step": 4622, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00045795520418323576, + "timestamp": "2025-09-10 02:25:40.240617", + "step": 4623, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:40.272382", + "step": 4623, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020052941981703043, + "timestamp": "2025-09-10 02:25:40.305646", + "step": 4624, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:40.336456", + "step": 4624, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003010353248100728, + "timestamp": "2025-09-10 02:25:40.341626", + "step": 4625, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:40.372635", + "step": 4625, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003616653848439455, + "timestamp": "2025-09-10 02:25:40.384970", + "step": 4626, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:40.415819", + "step": 4626, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009788486640900373, + "timestamp": "2025-09-10 02:25:40.422896", + "step": 4627, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:40.456481", + "step": 4627, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018540980527177453, + "timestamp": "2025-09-10 02:25:40.490768", + "step": 4628, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:40.522011", + "step": 4628, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019449996761977673, + "timestamp": "2025-09-10 02:25:40.532556", + "step": 4629, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:40.563716", + "step": 4629, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001505575724877417, + "timestamp": "2025-09-10 02:25:40.570742", + "step": 4630, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:40.602398", + "step": 4630, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010367255163146183, + "timestamp": "2025-09-10 02:25:40.604663", + "step": 4631, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:40.634796", + "step": 4631, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021751046006102115, + "timestamp": "2025-09-10 02:25:40.658513", + "step": 4632, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:40.689780", + "step": 4632, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005528530455194414, + "timestamp": "2025-09-10 02:25:40.694627", + "step": 4633, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:40.725402", + "step": 4633, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006717692012898624, + "timestamp": "2025-09-10 02:25:40.732500", + "step": 4634, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:40.763952", + "step": 4634, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003762389242183417, + "timestamp": "2025-09-10 02:25:40.771598", + "step": 4635, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:40.803947", + "step": 4635, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006608268013224006, + "timestamp": "2025-09-10 02:25:40.832461", + "step": 4636, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:40.864000", + "step": 4636, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.623411849839613e-05, + "timestamp": "2025-09-10 02:25:40.871941", + "step": 4637, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:40.903954", + "step": 4637, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006122788763605058, + "timestamp": "2025-09-10 02:25:40.910802", + "step": 4638, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:25:40.945948", + "step": 4638, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002174076158553362, + "timestamp": "2025-09-10 02:25:40.959955", + "step": 4639, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:40.995398", + "step": 4639, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002304811787325889, + "timestamp": "2025-09-10 02:25:41.030016", + "step": 4640, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:41.060128", + "step": 4640, + "epoch": 2 + }, + { + "type": "loss", + "content": 7.722365262452513e-05, + "timestamp": "2025-09-10 02:25:41.062407", + "step": 4641, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:25:41.096808", + "step": 4641, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00041160904220305383, + "timestamp": "2025-09-10 02:25:41.110676", + "step": 4642, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:41.141746", + "step": 4642, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0018459666753187776, + "timestamp": "2025-09-10 02:25:41.148706", + "step": 4643, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:41.180226", + "step": 4643, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011864164844155312, + "timestamp": "2025-09-10 02:25:41.208859", + "step": 4644, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:41.239694", + "step": 4644, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.6243621656904e-05, + "timestamp": "2025-09-10 02:25:41.241544", + "step": 4645, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:41.272194", + "step": 4645, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.054455097764730453, + "timestamp": "2025-09-10 02:25:41.279208", + "step": 4646, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:41.309894", + "step": 4646, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0007867555250413716, + "timestamp": "2025-09-10 02:25:41.314022", + "step": 4647, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:41.346859", + "step": 4647, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012387036986183375, + "timestamp": "2025-09-10 02:25:41.380311", + "step": 4648, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:41.413604", + "step": 4648, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019573597819544375, + "timestamp": "2025-09-10 02:25:41.418594", + "step": 4649, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:41.450652", + "step": 4649, + "epoch": 2 + }, + { + "type": "loss", + "content": 7.041559729259461e-05, + "timestamp": "2025-09-10 02:25:41.458149", + "step": 4650, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:41.490198", + "step": 4650, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.02323429472744465, + "timestamp": "2025-09-10 02:25:41.500100", + "step": 4651, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:41.531296", + "step": 4651, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0013165498385205865, + "timestamp": "2025-09-10 02:25:41.562358", + "step": 4652, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:41.593334", + "step": 4652, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002063662832370028, + "timestamp": "2025-09-10 02:25:41.595699", + "step": 4653, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:41.625894", + "step": 4653, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.011784272268414497, + "timestamp": "2025-09-10 02:25:41.628252", + "step": 4654, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:41.660002", + "step": 4654, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0023810744751244783, + "timestamp": "2025-09-10 02:25:41.667569", + "step": 4655, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:41.697803", + "step": 4655, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0012222749646753073, + "timestamp": "2025-09-10 02:25:41.723222", + "step": 4656, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:41.753995", + "step": 4656, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006310855969786644, + "timestamp": "2025-09-10 02:25:41.762630", + "step": 4657, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:41.793181", + "step": 4657, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.163497891975567e-05, + "timestamp": "2025-09-10 02:25:41.797539", + "step": 4658, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:41.829355", + "step": 4658, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00010095408651977777, + "timestamp": "2025-09-10 02:25:41.833453", + "step": 4659, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:41.865602", + "step": 4659, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00024290040892083198, + "timestamp": "2025-09-10 02:25:41.897535", + "step": 4660, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:41.929454", + "step": 4660, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009246188565157354, + "timestamp": "2025-09-10 02:25:41.942119", + "step": 4661, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:41.972950", + "step": 4661, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011847690911963582, + "timestamp": "2025-09-10 02:25:41.980351", + "step": 4662, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:42.019971", + "step": 4662, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0035353994462639093, + "timestamp": "2025-09-10 02:25:42.035617", + "step": 4663, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:42.066434", + "step": 4663, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010591925820335746, + "timestamp": "2025-09-10 02:25:42.095085", + "step": 4664, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:25:42.134536", + "step": 4664, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00923153292387724, + "timestamp": "2025-09-10 02:25:42.151529", + "step": 4665, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:42.182270", + "step": 4665, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.000438479648437351, + "timestamp": "2025-09-10 02:25:42.189395", + "step": 4666, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:42.226759", + "step": 4666, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.004933382850140333, + "timestamp": "2025-09-10 02:25:42.231094", + "step": 4667, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:42.262207", + "step": 4667, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.018754737451672554, + "timestamp": "2025-09-10 02:25:42.287551", + "step": 4668, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:42.319647", + "step": 4668, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001720808504614979, + "timestamp": "2025-09-10 02:25:42.324427", + "step": 4669, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:42.354995", + "step": 4669, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0011062477715313435, + "timestamp": "2025-09-10 02:25:42.362087", + "step": 4670, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:42.392755", + "step": 4670, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00019575886835809797, + "timestamp": "2025-09-10 02:25:42.403589", + "step": 4671, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:42.434530", + "step": 4671, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005415527615696192, + "timestamp": "2025-09-10 02:25:42.465694", + "step": 4672, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:42.497197", + "step": 4672, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010951546719297767, + "timestamp": "2025-09-10 02:25:42.501696", + "step": 4673, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:25:42.532796", + "step": 4673, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.002216489752754569, + "timestamp": "2025-09-10 02:25:42.535146", + "step": 4674, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:42.566942", + "step": 4674, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0029746759682893753, + "timestamp": "2025-09-10 02:25:42.571135", + "step": 4675, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:42.603076", + "step": 4675, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008459574310109019, + "timestamp": "2025-09-10 02:25:42.636460", + "step": 4676, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:42.667674", + "step": 4676, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005179825238883495, + "timestamp": "2025-09-10 02:25:42.669969", + "step": 4677, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:42.701536", + "step": 4677, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.020527558401226997, + "timestamp": "2025-09-10 02:25:42.708350", + "step": 4678, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:42.743444", + "step": 4678, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006391909555532038, + "timestamp": "2025-09-10 02:25:42.757207", + "step": 4679, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:42.791612", + "step": 4679, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017153627413790673, + "timestamp": "2025-09-10 02:25:42.826189", + "step": 4680, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:42.857263", + "step": 4680, + "epoch": 2 + }, + { + "type": "loss", + "content": 6.232234591152519e-05, + "timestamp": "2025-09-10 02:25:42.862050", + "step": 4681, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:42.893169", + "step": 4681, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010045451344922185, + "timestamp": "2025-09-10 02:25:42.904132", + "step": 4682, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:42.934949", + "step": 4682, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005457932711578906, + "timestamp": "2025-09-10 02:25:42.945212", + "step": 4683, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:25:42.980176", + "step": 4683, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005815924378111959, + "timestamp": "2025-09-10 02:25:43.015032", + "step": 4684, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:25:43.054138", + "step": 4684, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005409900564700365, + "timestamp": "2025-09-10 02:25:43.070846", + "step": 4685, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:43.102700", + "step": 4685, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00027891225181519985, + "timestamp": "2025-09-10 02:25:43.107089", + "step": 4686, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:43.137992", + "step": 4686, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00238403445109725, + "timestamp": "2025-09-10 02:25:43.142416", + "step": 4687, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:43.175928", + "step": 4687, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003628423437476158, + "timestamp": "2025-09-10 02:25:43.210185", + "step": 4688, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:43.240934", + "step": 4688, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005772776901721954, + "timestamp": "2025-09-10 02:25:43.248672", + "step": 4689, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:43.279895", + "step": 4689, + "epoch": 2 + }, + { + "type": "loss", + "content": 5.581247023656033e-05, + "timestamp": "2025-09-10 02:25:43.287182", + "step": 4690, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:43.318782", + "step": 4690, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001327107078395784, + "timestamp": "2025-09-10 02:25:43.331000", + "step": 4691, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:43.361819", + "step": 4691, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.039219219237565994, + "timestamp": "2025-09-10 02:25:43.386848", + "step": 4692, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:43.420335", + "step": 4692, + "epoch": 2 + }, + { + "type": "loss", + "content": 8.427041029790416e-05, + "timestamp": "2025-09-10 02:25:43.425397", + "step": 4693, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:43.456992", + "step": 4693, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00012090602103853598, + "timestamp": "2025-09-10 02:25:43.463920", + "step": 4694, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:25:43.512243", + "step": 4694, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0006930717499926686, + "timestamp": "2025-09-10 02:25:43.529330", + "step": 4695, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:43.566468", + "step": 4695, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0009244754328392446, + "timestamp": "2025-09-10 02:25:43.594568", + "step": 4696, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:43.626716", + "step": 4696, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.005284165497869253, + "timestamp": "2025-09-10 02:25:43.631652", + "step": 4697, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:43.665532", + "step": 4697, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0008305375231429935, + "timestamp": "2025-09-10 02:25:43.678870", + "step": 4698, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:25:43.719171", + "step": 4698, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0010617909720167518, + "timestamp": "2025-09-10 02:25:43.735466", + "step": 4699, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:43.768609", + "step": 4699, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0003633351589087397, + "timestamp": "2025-09-10 02:25:43.797140", + "step": 4700, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:43.827485", + "step": 4700, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001146303373388946, + "timestamp": "2025-09-10 02:25:43.833032", + "step": 4701, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:43.864017", + "step": 4701, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001229366025654599, + "timestamp": "2025-09-10 02:25:43.871844", + "step": 4702, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:43.903234", + "step": 4702, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0005992205115035176, + "timestamp": "2025-09-10 02:25:43.910729", + "step": 4703, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:43.942758", + "step": 4703, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0020961128175258636, + "timestamp": "2025-09-10 02:25:43.971239", + "step": 4704, + "epoch": 2 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:25:54.264978", + "step": 4704, + "epoch": 2 + }, + { + "type": "pplx", + "content": 22252700.049582753, + "timestamp": "2025-09-10 02:25:54.267701", + "step": 4704, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:54.299493", + "step": 4704, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0002747270918916911, + "timestamp": "2025-09-10 02:25:54.306002", + "step": 4705, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:54.337028", + "step": 4705, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0001008029212243855, + "timestamp": "2025-09-10 02:25:54.341004", + "step": 4706, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:54.374810", + "step": 4706, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0037881359457969666, + "timestamp": "2025-09-10 02:25:54.388470", + "step": 4707, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:54.419023", + "step": 4707, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00021644457592628896, + "timestamp": "2025-09-10 02:25:54.447372", + "step": 4708, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:25:54.500651", + "step": 4708, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0004264476883690804, + "timestamp": "2025-09-10 02:25:54.524182", + "step": 4709, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:54.555932", + "step": 4709, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00112416862975806, + "timestamp": "2025-09-10 02:25:54.566184", + "step": 4710, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:54.602033", + "step": 4710, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.001086343778297305, + "timestamp": "2025-09-10 02:25:54.609220", + "step": 4711, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:54.641173", + "step": 4711, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.0024282841477543116, + "timestamp": "2025-09-10 02:25:54.666306", + "step": 4712, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:25:54.711375", + "step": 4712, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00020581232092808932, + "timestamp": "2025-09-10 02:25:54.730406", + "step": 4713, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 3, + 224 + ], + "flops": 4983601869792 + }, + "timestamp": "2025-09-10 02:25:54.764147", + "step": 4713, + "epoch": 2 + }, + { + "type": "loss", + "content": 0.00017851527081802487, + "timestamp": "2025-09-10 02:25:54.767859", + "step": 4714, + "epoch": 2 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:54.820878", + "step": 4714, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.199771062005311e-05, + "timestamp": "2025-09-10 02:25:54.825650", + "step": 4715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:54.860167", + "step": 4715, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005822144448757172, + "timestamp": "2025-09-10 02:25:54.888916", + "step": 4716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:54.925746", + "step": 4716, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002994556853082031, + "timestamp": "2025-09-10 02:25:54.930312", + "step": 4717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:54.966869", + "step": 4717, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.113481504144147e-05, + "timestamp": "2025-09-10 02:25:54.973429", + "step": 4718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:55.010593", + "step": 4718, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.410970672732219e-05, + "timestamp": "2025-09-10 02:25:55.017589", + "step": 4719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:55.049280", + "step": 4719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018373571801930666, + "timestamp": "2025-09-10 02:25:55.076029", + "step": 4720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:55.108559", + "step": 4720, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005885870661586523, + "timestamp": "2025-09-10 02:25:55.112996", + "step": 4721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:55.143782", + "step": 4721, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000682682148180902, + "timestamp": "2025-09-10 02:25:55.151398", + "step": 4722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:55.187563", + "step": 4722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009401330025866628, + "timestamp": "2025-09-10 02:25:55.197939", + "step": 4723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:55.228686", + "step": 4723, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.497522114543244e-05, + "timestamp": "2025-09-10 02:25:55.257550", + "step": 4724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:55.288450", + "step": 4724, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003811214992310852, + "timestamp": "2025-09-10 02:25:55.293082", + "step": 4725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:55.322824", + "step": 4725, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003699273569509387, + "timestamp": "2025-09-10 02:25:55.329732", + "step": 4726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:55.360246", + "step": 4726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002697373856790364, + "timestamp": "2025-09-10 02:25:55.364754", + "step": 4727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:55.398993", + "step": 4727, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001845106016844511, + "timestamp": "2025-09-10 02:25:55.433261", + "step": 4728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:55.465486", + "step": 4728, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001434317382518202, + "timestamp": "2025-09-10 02:25:55.467733", + "step": 4729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:55.499235", + "step": 4729, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.866209100233391e-05, + "timestamp": "2025-09-10 02:25:55.503347", + "step": 4730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:55.534093", + "step": 4730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001506084663560614, + "timestamp": "2025-09-10 02:25:55.546333", + "step": 4731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:55.576315", + "step": 4731, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013919537886977196, + "timestamp": "2025-09-10 02:25:55.601714", + "step": 4732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:55.642361", + "step": 4732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031470126123167574, + "timestamp": "2025-09-10 02:25:55.649630", + "step": 4733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:55.692765", + "step": 4733, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006087294896133244, + "timestamp": "2025-09-10 02:25:55.698471", + "step": 4734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:55.731178", + "step": 4734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029154361691325903, + "timestamp": "2025-09-10 02:25:55.741490", + "step": 4735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:55.773923", + "step": 4735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005959026166237891, + "timestamp": "2025-09-10 02:25:55.799731", + "step": 4736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:55.832859", + "step": 4736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007912717992439866, + "timestamp": "2025-09-10 02:25:55.845448", + "step": 4737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:55.878382", + "step": 4737, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.05290910648182e-05, + "timestamp": "2025-09-10 02:25:55.885600", + "step": 4738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:55.916824", + "step": 4738, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016422310145571828, + "timestamp": "2025-09-10 02:25:55.923786", + "step": 4739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:55.957047", + "step": 4739, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033386718132533133, + "timestamp": "2025-09-10 02:25:55.984672", + "step": 4740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:56.018814", + "step": 4740, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002576867409516126, + "timestamp": "2025-09-10 02:25:56.023115", + "step": 4741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:56.055754", + "step": 4741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020760188344866037, + "timestamp": "2025-09-10 02:25:56.058458", + "step": 4742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:56.090856", + "step": 4742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004879082087427378, + "timestamp": "2025-09-10 02:25:56.098101", + "step": 4743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:56.132776", + "step": 4743, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.860662738792598e-05, + "timestamp": "2025-09-10 02:25:56.167244", + "step": 4744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:56.200709", + "step": 4744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001819897734094411, + "timestamp": "2025-09-10 02:25:56.204983", + "step": 4745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:56.236803", + "step": 4745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019539693312253803, + "timestamp": "2025-09-10 02:25:56.243514", + "step": 4746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:56.286988", + "step": 4746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030586186330765486, + "timestamp": "2025-09-10 02:25:56.291110", + "step": 4747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:56.322545", + "step": 4747, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042248849058523774, + "timestamp": "2025-09-10 02:25:56.351141", + "step": 4748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:56.382291", + "step": 4748, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006272942409850657, + "timestamp": "2025-09-10 02:25:56.389844", + "step": 4749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:56.421653", + "step": 4749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040742545388638973, + "timestamp": "2025-09-10 02:25:56.425826", + "step": 4750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:56.458601", + "step": 4750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014489439490716904, + "timestamp": "2025-09-10 02:25:56.465546", + "step": 4751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:56.499456", + "step": 4751, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000770228507462889, + "timestamp": "2025-09-10 02:25:56.523257", + "step": 4752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:56.555302", + "step": 4752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037505757063627243, + "timestamp": "2025-09-10 02:25:56.557426", + "step": 4753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:56.587932", + "step": 4753, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023457292991224676, + "timestamp": "2025-09-10 02:25:56.595023", + "step": 4754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:56.626289", + "step": 4754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00568475853651762, + "timestamp": "2025-09-10 02:25:56.633673", + "step": 4755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:56.665423", + "step": 4755, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008253042469732463, + "timestamp": "2025-09-10 02:25:56.690687", + "step": 4756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:56.722401", + "step": 4756, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006686433334834874, + "timestamp": "2025-09-10 02:25:56.734993", + "step": 4757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:56.765610", + "step": 4757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004306059854570776, + "timestamp": "2025-09-10 02:25:56.772714", + "step": 4758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:56.802645", + "step": 4758, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03919557109475136, + "timestamp": "2025-09-10 02:25:56.806926", + "step": 4759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:56.837195", + "step": 4759, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008901845430955291, + "timestamp": "2025-09-10 02:25:56.865124", + "step": 4760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:56.899149", + "step": 4760, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022610726591665298, + "timestamp": "2025-09-10 02:25:56.906750", + "step": 4761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:56.940355", + "step": 4761, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.875943654449657e-05, + "timestamp": "2025-09-10 02:25:56.947266", + "step": 4762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:56.978084", + "step": 4762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004053361772093922, + "timestamp": "2025-09-10 02:25:56.989159", + "step": 4763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:57.022371", + "step": 4763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025856548454612494, + "timestamp": "2025-09-10 02:25:57.054174", + "step": 4764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:57.085349", + "step": 4764, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020959861285518855, + "timestamp": "2025-09-10 02:25:57.087517", + "step": 4765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 928 + ], + "flops": 27527278844800 + }, + "timestamp": "2025-09-10 02:25:57.164501", + "step": 4765, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000836056366097182, + "timestamp": "2025-09-10 02:25:57.196102", + "step": 4766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:57.227009", + "step": 4766, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007483740919269621, + "timestamp": "2025-09-10 02:25:57.231213", + "step": 4767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:57.261606", + "step": 4767, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014812864537816495, + "timestamp": "2025-09-10 02:25:57.292657", + "step": 4768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:57.323573", + "step": 4768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001649035548325628, + "timestamp": "2025-09-10 02:25:57.328618", + "step": 4769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:57.367308", + "step": 4769, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.814439358189702e-05, + "timestamp": "2025-09-10 02:25:57.382928", + "step": 4770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:25:57.413769", + "step": 4770, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018101210007444024, + "timestamp": "2025-09-10 02:25:57.415992", + "step": 4771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:57.446752", + "step": 4771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005368964048102498, + "timestamp": "2025-09-10 02:25:57.474370", + "step": 4772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:57.504468", + "step": 4772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005379213253036141, + "timestamp": "2025-09-10 02:25:57.512288", + "step": 4773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:57.543048", + "step": 4773, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007029054104350507, + "timestamp": "2025-09-10 02:25:57.550992", + "step": 4774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:57.583074", + "step": 4774, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017190443351864815, + "timestamp": "2025-09-10 02:25:57.595255", + "step": 4775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:57.627841", + "step": 4775, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006791255436837673, + "timestamp": "2025-09-10 02:25:57.655695", + "step": 4776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:57.688970", + "step": 4776, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.802838520845398e-05, + "timestamp": "2025-09-10 02:25:57.694277", + "step": 4777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:57.726991", + "step": 4777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004995018825866282, + "timestamp": "2025-09-10 02:25:57.734016", + "step": 4778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:25:57.774778", + "step": 4778, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.5099070373689756e-05, + "timestamp": "2025-09-10 02:25:57.790648", + "step": 4779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:57.822302", + "step": 4779, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015079900913406163, + "timestamp": "2025-09-10 02:25:57.850184", + "step": 4780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:57.880239", + "step": 4780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010758963180705905, + "timestamp": "2025-09-10 02:25:57.882597", + "step": 4781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:57.912637", + "step": 4781, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022532150149345398, + "timestamp": "2025-09-10 02:25:57.917341", + "step": 4782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:57.949411", + "step": 4782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006792863365262747, + "timestamp": "2025-09-10 02:25:57.957123", + "step": 4783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:25:57.996736", + "step": 4783, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010572600876912475, + "timestamp": "2025-09-10 02:25:58.033523", + "step": 4784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:58.070175", + "step": 4784, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010956094047287479, + "timestamp": "2025-09-10 02:25:58.075183", + "step": 4785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:25:58.106182", + "step": 4785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000302365719107911, + "timestamp": "2025-09-10 02:25:58.118711", + "step": 4786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:58.148933", + "step": 4786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022172143682837486, + "timestamp": "2025-09-10 02:25:58.156099", + "step": 4787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:58.188968", + "step": 4787, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.383214637637138e-05, + "timestamp": "2025-09-10 02:25:58.217333", + "step": 4788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:58.248652", + "step": 4788, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019239891553297639, + "timestamp": "2025-09-10 02:25:58.259436", + "step": 4789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:58.301360", + "step": 4789, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011966370948357508, + "timestamp": "2025-09-10 02:25:58.316977", + "step": 4790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:58.350131", + "step": 4790, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045444341958500445, + "timestamp": "2025-09-10 02:25:58.356818", + "step": 4791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:25:58.391166", + "step": 4791, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012033795792376623, + "timestamp": "2025-09-10 02:25:58.415950", + "step": 4792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:58.448459", + "step": 4792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009351377957500517, + "timestamp": "2025-09-10 02:25:58.461022", + "step": 4793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:25:58.498355", + "step": 4793, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011824551038444042, + "timestamp": "2025-09-10 02:25:58.513914", + "step": 4794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:58.544714", + "step": 4794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004735906142741442, + "timestamp": "2025-09-10 02:25:58.549049", + "step": 4795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:58.580216", + "step": 4795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010355941019952297, + "timestamp": "2025-09-10 02:25:58.613219", + "step": 4796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:58.653939", + "step": 4796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03133935481309891, + "timestamp": "2025-09-10 02:25:58.661346", + "step": 4797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:58.695774", + "step": 4797, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.2887880883645266e-05, + "timestamp": "2025-09-10 02:25:58.700167", + "step": 4798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:58.739637", + "step": 4798, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006416105316020548, + "timestamp": "2025-09-10 02:25:58.746381", + "step": 4799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:25:58.783249", + "step": 4799, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007519474602304399, + "timestamp": "2025-09-10 02:25:58.817805", + "step": 4800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:58.851422", + "step": 4800, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013969124993309379, + "timestamp": "2025-09-10 02:25:58.853678", + "step": 4801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:58.885301", + "step": 4801, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015225332754198462, + "timestamp": "2025-09-10 02:25:58.892596", + "step": 4802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:25:58.935774", + "step": 4802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002063736552372575, + "timestamp": "2025-09-10 02:25:58.953398", + "step": 4803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:58.994738", + "step": 4803, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044029252603650093, + "timestamp": "2025-09-10 02:25:59.022422", + "step": 4804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:59.063004", + "step": 4804, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024363842385355383, + "timestamp": "2025-09-10 02:25:59.067300", + "step": 4805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:25:59.104097", + "step": 4805, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.4000339180929586e-05, + "timestamp": "2025-09-10 02:25:59.117432", + "step": 4806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:59.160602", + "step": 4806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.027750907465815544, + "timestamp": "2025-09-10 02:25:59.165189", + "step": 4807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:59.198218", + "step": 4807, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.746334449620917e-05, + "timestamp": "2025-09-10 02:25:59.226048", + "step": 4808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:25:59.267682", + "step": 4808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004865474067628384, + "timestamp": "2025-09-10 02:25:59.284691", + "step": 4809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:59.318826", + "step": 4809, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006346892914734781, + "timestamp": "2025-09-10 02:25:59.325602", + "step": 4810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:59.358416", + "step": 4810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009411797509528697, + "timestamp": "2025-09-10 02:25:59.365575", + "step": 4811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:59.398191", + "step": 4811, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.868631655583158e-05, + "timestamp": "2025-09-10 02:25:59.423313", + "step": 4812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:25:59.454662", + "step": 4812, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019831575627904385, + "timestamp": "2025-09-10 02:25:59.457327", + "step": 4813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:59.493006", + "step": 4813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005292465444654226, + "timestamp": "2025-09-10 02:25:59.500034", + "step": 4814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:25:59.531461", + "step": 4814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013571852818131447, + "timestamp": "2025-09-10 02:25:59.534195", + "step": 4815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:59.566265", + "step": 4815, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036411576438695192, + "timestamp": "2025-09-10 02:25:59.594002", + "step": 4816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:59.628397", + "step": 4816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03516482934355736, + "timestamp": "2025-09-10 02:25:59.633370", + "step": 4817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:25:59.669479", + "step": 4817, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.574579987907782e-05, + "timestamp": "2025-09-10 02:25:59.681420", + "step": 4818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:25:59.714869", + "step": 4818, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.725396502180956e-05, + "timestamp": "2025-09-10 02:25:59.725620", + "step": 4819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:25:59.759943", + "step": 4819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006414660601876676, + "timestamp": "2025-09-10 02:25:59.787921", + "step": 4820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:25:59.821105", + "step": 4820, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026372602209448814, + "timestamp": "2025-09-10 02:25:59.825502", + "step": 4821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:25:59.859168", + "step": 4821, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004305239766836166, + "timestamp": "2025-09-10 02:25:59.866853", + "step": 4822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:25:59.899931", + "step": 4822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001554366433992982, + "timestamp": "2025-09-10 02:25:59.909434", + "step": 4823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:25:59.943662", + "step": 4823, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.843928112066351e-05, + "timestamp": "2025-09-10 02:25:59.971429", + "step": 4824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:00.006017", + "step": 4824, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037253022310324013, + "timestamp": "2025-09-10 02:26:00.016360", + "step": 4825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:00.047393", + "step": 4825, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015221700596157461, + "timestamp": "2025-09-10 02:26:00.054706", + "step": 4826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:00.087117", + "step": 4826, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029021056834608316, + "timestamp": "2025-09-10 02:26:00.094410", + "step": 4827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:00.128880", + "step": 4827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008609069511294365, + "timestamp": "2025-09-10 02:26:00.156657", + "step": 4828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:00.190329", + "step": 4828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01999555341899395, + "timestamp": "2025-09-10 02:26:00.195215", + "step": 4829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:00.226485", + "step": 4829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010207198793068528, + "timestamp": "2025-09-10 02:26:00.233313", + "step": 4830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:00.270031", + "step": 4830, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045846131979487836, + "timestamp": "2025-09-10 02:26:00.283364", + "step": 4831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:00.316424", + "step": 4831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019452109932899475, + "timestamp": "2025-09-10 02:26:00.344133", + "step": 4832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:00.379059", + "step": 4832, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.028431635349988937, + "timestamp": "2025-09-10 02:26:00.389300", + "step": 4833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:00.425448", + "step": 4833, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021825884468853474, + "timestamp": "2025-09-10 02:26:00.438782", + "step": 4834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:00.472698", + "step": 4834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035446875263005495, + "timestamp": "2025-09-10 02:26:00.484427", + "step": 4835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:00.515445", + "step": 4835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008904158137738705, + "timestamp": "2025-09-10 02:26:00.546371", + "step": 4836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:00.577790", + "step": 4836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.025139760226011276, + "timestamp": "2025-09-10 02:26:00.582495", + "step": 4837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:00.613669", + "step": 4837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02474926970899105, + "timestamp": "2025-09-10 02:26:00.624621", + "step": 4838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:00.655331", + "step": 4838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005889459978789091, + "timestamp": "2025-09-10 02:26:00.658335", + "step": 4839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:00.690023", + "step": 4839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004479756171349436, + "timestamp": "2025-09-10 02:26:00.721142", + "step": 4840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:26:00.754422", + "step": 4840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00183139240834862, + "timestamp": "2025-09-10 02:26:00.767786", + "step": 4841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:26:00.800060", + "step": 4841, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007262559956870973, + "timestamp": "2025-09-10 02:26:00.802493", + "step": 4842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:00.833909", + "step": 4842, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002282061759615317, + "timestamp": "2025-09-10 02:26:00.846515", + "step": 4843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:26:00.876802", + "step": 4843, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018490191723685712, + "timestamp": "2025-09-10 02:26:00.900295", + "step": 4844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:00.930704", + "step": 4844, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.037172622978687286, + "timestamp": "2025-09-10 02:26:00.935978", + "step": 4845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:00.966702", + "step": 4845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007538548088632524, + "timestamp": "2025-09-10 02:26:00.970742", + "step": 4846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:01.001708", + "step": 4846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003542929422110319, + "timestamp": "2025-09-10 02:26:01.009177", + "step": 4847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:01.039473", + "step": 4847, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006386330351233482, + "timestamp": "2025-09-10 02:26:01.068042", + "step": 4848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:01.098644", + "step": 4848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007097636116668582, + "timestamp": "2025-09-10 02:26:01.108611", + "step": 4849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:01.139356", + "step": 4849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003923158801626414, + "timestamp": "2025-09-10 02:26:01.146692", + "step": 4850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:01.176912", + "step": 4850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001541634788736701, + "timestamp": "2025-09-10 02:26:01.189033", + "step": 4851, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:26:11.251927", + "step": 4851, + "epoch": 3 + }, + { + "type": "pplx", + "content": 22527274.187912628, + "timestamp": "2025-09-10 02:26:11.255094", + "step": 4851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:11.285937", + "step": 4851, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015954956179484725, + "timestamp": "2025-09-10 02:26:11.312678", + "step": 4852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:11.346481", + "step": 4852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044376106234267354, + "timestamp": "2025-09-10 02:26:11.351218", + "step": 4853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:11.383233", + "step": 4853, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005625728517770767, + "timestamp": "2025-09-10 02:26:11.390990", + "step": 4854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:11.428386", + "step": 4854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009019298478960991, + "timestamp": "2025-09-10 02:26:11.442133", + "step": 4855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:11.473458", + "step": 4855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00413868110626936, + "timestamp": "2025-09-10 02:26:11.501704", + "step": 4856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:11.532532", + "step": 4856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003734805155545473, + "timestamp": "2025-09-10 02:26:11.537572", + "step": 4857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:11.568519", + "step": 4857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028699575341306627, + "timestamp": "2025-09-10 02:26:11.576305", + "step": 4858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:11.607157", + "step": 4858, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013391379616223276, + "timestamp": "2025-09-10 02:26:11.617438", + "step": 4859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:11.649472", + "step": 4859, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008628031238913536, + "timestamp": "2025-09-10 02:26:11.677213", + "step": 4860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:11.711931", + "step": 4860, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020894031040370464, + "timestamp": "2025-09-10 02:26:11.719440", + "step": 4861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:11.752557", + "step": 4861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005519864149391651, + "timestamp": "2025-09-10 02:26:11.760336", + "step": 4862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:11.792765", + "step": 4862, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005144777707755566, + "timestamp": "2025-09-10 02:26:11.799769", + "step": 4863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:11.830246", + "step": 4863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043783331057056785, + "timestamp": "2025-09-10 02:26:11.857871", + "step": 4864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:26:11.890550", + "step": 4864, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005862680729478598, + "timestamp": "2025-09-10 02:26:11.903662", + "step": 4865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:11.934633", + "step": 4865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007736521307379007, + "timestamp": "2025-09-10 02:26:11.945036", + "step": 4866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:26:11.975688", + "step": 4866, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004861840512603521, + "timestamp": "2025-09-10 02:26:11.977952", + "step": 4867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:12.008274", + "step": 4867, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005645510274916887, + "timestamp": "2025-09-10 02:26:12.036047", + "step": 4868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:12.067329", + "step": 4868, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002813952276483178, + "timestamp": "2025-09-10 02:26:12.077850", + "step": 4869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:12.108352", + "step": 4869, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005273285787552595, + "timestamp": "2025-09-10 02:26:12.112325", + "step": 4870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:12.143472", + "step": 4870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008787405677139759, + "timestamp": "2025-09-10 02:26:12.145799", + "step": 4871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:12.178570", + "step": 4871, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001541761914268136, + "timestamp": "2025-09-10 02:26:12.210555", + "step": 4872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:12.245477", + "step": 4872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011291869916021824, + "timestamp": "2025-09-10 02:26:12.253776", + "step": 4873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:12.286054", + "step": 4873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001998367952182889, + "timestamp": "2025-09-10 02:26:12.293055", + "step": 4874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:12.329385", + "step": 4874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005770617164671421, + "timestamp": "2025-09-10 02:26:12.333776", + "step": 4875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:12.365533", + "step": 4875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012409423943609, + "timestamp": "2025-09-10 02:26:12.390766", + "step": 4876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:12.424494", + "step": 4876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001259890734218061, + "timestamp": "2025-09-10 02:26:12.428830", + "step": 4877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:12.459636", + "step": 4877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013752224622294307, + "timestamp": "2025-09-10 02:26:12.466402", + "step": 4878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:12.498281", + "step": 4878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00954064168035984, + "timestamp": "2025-09-10 02:26:12.504671", + "step": 4879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:12.538044", + "step": 4879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020668611396104097, + "timestamp": "2025-09-10 02:26:12.568741", + "step": 4880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:12.602992", + "step": 4880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032076804200187325, + "timestamp": "2025-09-10 02:26:12.612119", + "step": 4881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:12.649546", + "step": 4881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008431184687651694, + "timestamp": "2025-09-10 02:26:12.663261", + "step": 4882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:12.705884", + "step": 4882, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004074872005730867, + "timestamp": "2025-09-10 02:26:12.719281", + "step": 4883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:12.752046", + "step": 4883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002831391990184784, + "timestamp": "2025-09-10 02:26:12.776773", + "step": 4884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:12.808218", + "step": 4884, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004910766612738371, + "timestamp": "2025-09-10 02:26:12.810889", + "step": 4885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:12.841938", + "step": 4885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009595355950295925, + "timestamp": "2025-09-10 02:26:12.854581", + "step": 4886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:12.888932", + "step": 4886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013698582537472248, + "timestamp": "2025-09-10 02:26:12.902772", + "step": 4887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:12.933454", + "step": 4887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003295644710306078, + "timestamp": "2025-09-10 02:26:12.958026", + "step": 4888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:12.988869", + "step": 4888, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.0092607125407085e-05, + "timestamp": "2025-09-10 02:26:12.993445", + "step": 4889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:13.026601", + "step": 4889, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013606924330815673, + "timestamp": "2025-09-10 02:26:13.039119", + "step": 4890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:13.070766", + "step": 4890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027366813737899065, + "timestamp": "2025-09-10 02:26:13.077822", + "step": 4891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:26:13.131591", + "step": 4891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004872309975326061, + "timestamp": "2025-09-10 02:26:13.173871", + "step": 4892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:13.208531", + "step": 4892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004739653959404677, + "timestamp": "2025-09-10 02:26:13.217311", + "step": 4893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:13.249819", + "step": 4893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016823039622977376, + "timestamp": "2025-09-10 02:26:13.254188", + "step": 4894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:13.285267", + "step": 4894, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004189238417893648, + "timestamp": "2025-09-10 02:26:13.289701", + "step": 4895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:13.334340", + "step": 4895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009055934497155249, + "timestamp": "2025-09-10 02:26:13.369012", + "step": 4896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:26:13.403183", + "step": 4896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026929269079118967, + "timestamp": "2025-09-10 02:26:13.416504", + "step": 4897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:13.447231", + "step": 4897, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013669952750205994, + "timestamp": "2025-09-10 02:26:13.451788", + "step": 4898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:13.483646", + "step": 4898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022419628221541643, + "timestamp": "2025-09-10 02:26:13.490450", + "step": 4899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:13.523859", + "step": 4899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009813571814447641, + "timestamp": "2025-09-10 02:26:13.551769", + "step": 4900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:13.583180", + "step": 4900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005111963837407529, + "timestamp": "2025-09-10 02:26:13.586698", + "step": 4901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:13.617822", + "step": 4901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018179837614297867, + "timestamp": "2025-09-10 02:26:13.628203", + "step": 4902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:13.658289", + "step": 4902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024603954516351223, + "timestamp": "2025-09-10 02:26:13.662869", + "step": 4903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:13.694489", + "step": 4903, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.283476588781923e-05, + "timestamp": "2025-09-10 02:26:13.723154", + "step": 4904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:13.753868", + "step": 4904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006378447869792581, + "timestamp": "2025-09-10 02:26:13.758616", + "step": 4905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:26:13.793601", + "step": 4905, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005772311706095934, + "timestamp": "2025-09-10 02:26:13.807551", + "step": 4906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:13.841772", + "step": 4906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017352063150610775, + "timestamp": "2025-09-10 02:26:13.849465", + "step": 4907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:13.880186", + "step": 4907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007246700115501881, + "timestamp": "2025-09-10 02:26:13.905590", + "step": 4908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:13.937751", + "step": 4908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003136695013381541, + "timestamp": "2025-09-10 02:26:13.942598", + "step": 4909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:13.974113", + "step": 4909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005124423187226057, + "timestamp": "2025-09-10 02:26:13.982009", + "step": 4910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:14.013185", + "step": 4910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003622827643994242, + "timestamp": "2025-09-10 02:26:14.023544", + "step": 4911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:14.054555", + "step": 4911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01249981764703989, + "timestamp": "2025-09-10 02:26:14.082638", + "step": 4912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:14.114302", + "step": 4912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0059426832012832165, + "timestamp": "2025-09-10 02:26:14.119050", + "step": 4913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:14.150381", + "step": 4913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011479367094580084, + "timestamp": "2025-09-10 02:26:14.161544", + "step": 4914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:14.191668", + "step": 4914, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021279591601341963, + "timestamp": "2025-09-10 02:26:14.198639", + "step": 4915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:14.229407", + "step": 4915, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.092241215403192e-05, + "timestamp": "2025-09-10 02:26:14.257786", + "step": 4916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:14.288854", + "step": 4916, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016833031259011477, + "timestamp": "2025-09-10 02:26:14.293904", + "step": 4917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:14.327484", + "step": 4917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000857060425914824, + "timestamp": "2025-09-10 02:26:14.335234", + "step": 4918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:14.366296", + "step": 4918, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.280987195670605e-05, + "timestamp": "2025-09-10 02:26:14.373922", + "step": 4919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:14.404028", + "step": 4919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001588566112332046, + "timestamp": "2025-09-10 02:26:14.432402", + "step": 4920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:14.462073", + "step": 4920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03219066932797432, + "timestamp": "2025-09-10 02:26:14.466695", + "step": 4921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:14.497215", + "step": 4921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037080320180393755, + "timestamp": "2025-09-10 02:26:14.509609", + "step": 4922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:14.539847", + "step": 4922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011398802598705515, + "timestamp": "2025-09-10 02:26:14.547735", + "step": 4923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:14.578237", + "step": 4923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020103438873775303, + "timestamp": "2025-09-10 02:26:14.603447", + "step": 4924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:14.635658", + "step": 4924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020608811173588037, + "timestamp": "2025-09-10 02:26:14.640856", + "step": 4925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:14.671556", + "step": 4925, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.329066673060879e-05, + "timestamp": "2025-09-10 02:26:14.675957", + "step": 4926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:14.706195", + "step": 4926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008835741318762302, + "timestamp": "2025-09-10 02:26:14.710912", + "step": 4927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:14.741722", + "step": 4927, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034493012935854495, + "timestamp": "2025-09-10 02:26:14.766534", + "step": 4928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:14.796398", + "step": 4928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003803166327998042, + "timestamp": "2025-09-10 02:26:14.801450", + "step": 4929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:14.832599", + "step": 4929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011779103806475177, + "timestamp": "2025-09-10 02:26:14.839344", + "step": 4930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:14.869905", + "step": 4930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015055211260914803, + "timestamp": "2025-09-10 02:26:14.874259", + "step": 4931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:14.907594", + "step": 4931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029439402278512716, + "timestamp": "2025-09-10 02:26:14.941862", + "step": 4932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:14.973346", + "step": 4932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018022792937699705, + "timestamp": "2025-09-10 02:26:14.977958", + "step": 4933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:15.009235", + "step": 4933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008141091093420982, + "timestamp": "2025-09-10 02:26:15.021774", + "step": 4934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:15.055739", + "step": 4934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017168700287584215, + "timestamp": "2025-09-10 02:26:15.069567", + "step": 4935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:15.099959", + "step": 4935, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.493528341408819e-05, + "timestamp": "2025-09-10 02:26:15.128009", + "step": 4936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:15.159409", + "step": 4936, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017535120423417538, + "timestamp": "2025-09-10 02:26:15.164334", + "step": 4937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:15.195047", + "step": 4937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033908261684700847, + "timestamp": "2025-09-10 02:26:15.202033", + "step": 4938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:26:15.248703", + "step": 4938, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015323214756790549, + "timestamp": "2025-09-10 02:26:15.267902", + "step": 4939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:15.301186", + "step": 4939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017902174731716514, + "timestamp": "2025-09-10 02:26:15.333311", + "step": 4940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:15.363730", + "step": 4940, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014680066669825464, + "timestamp": "2025-09-10 02:26:15.372333", + "step": 4941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:15.402692", + "step": 4941, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020298264280427247, + "timestamp": "2025-09-10 02:26:15.407122", + "step": 4942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:15.440189", + "step": 4942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014298847236204892, + "timestamp": "2025-09-10 02:26:15.453645", + "step": 4943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:15.485754", + "step": 4943, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034119986230507493, + "timestamp": "2025-09-10 02:26:15.519215", + "step": 4944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:26:15.554520", + "step": 4944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043467155774123967, + "timestamp": "2025-09-10 02:26:15.569606", + "step": 4945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:15.600181", + "step": 4945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013701003626920283, + "timestamp": "2025-09-10 02:26:15.610359", + "step": 4946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:15.640935", + "step": 4946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013453126884996891, + "timestamp": "2025-09-10 02:26:15.653151", + "step": 4947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:15.683338", + "step": 4947, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.32469925424084e-05, + "timestamp": "2025-09-10 02:26:15.716450", + "step": 4948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:15.746478", + "step": 4948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.06172531098127365, + "timestamp": "2025-09-10 02:26:15.751195", + "step": 4949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:15.783122", + "step": 4949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008722272468730807, + "timestamp": "2025-09-10 02:26:15.790214", + "step": 4950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:15.820155", + "step": 4950, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04367053508758545, + "timestamp": "2025-09-10 02:26:15.827844", + "step": 4951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:15.857422", + "step": 4951, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004010576813016087, + "timestamp": "2025-09-10 02:26:15.885360", + "step": 4952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:15.915490", + "step": 4952, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.4004107874352485e-05, + "timestamp": "2025-09-10 02:26:15.920410", + "step": 4953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:26:15.957792", + "step": 4953, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007229152601212263, + "timestamp": "2025-09-10 02:26:15.973402", + "step": 4954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:16.004631", + "step": 4954, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.849377703270875e-05, + "timestamp": "2025-09-10 02:26:16.012289", + "step": 4955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:16.042679", + "step": 4955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004347801208496094, + "timestamp": "2025-09-10 02:26:16.070650", + "step": 4956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:16.101178", + "step": 4956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001966599520528689, + "timestamp": "2025-09-10 02:26:16.104169", + "step": 4957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:26:16.135122", + "step": 4957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021397892851382494, + "timestamp": "2025-09-10 02:26:16.137429", + "step": 4958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:16.168886", + "step": 4958, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024242003564722836, + "timestamp": "2025-09-10 02:26:16.176400", + "step": 4959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:16.206448", + "step": 4959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008051989716477692, + "timestamp": "2025-09-10 02:26:16.234381", + "step": 4960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:16.265419", + "step": 4960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025039296597242355, + "timestamp": "2025-09-10 02:26:16.270792", + "step": 4961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:26:16.308395", + "step": 4961, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.02980105415918e-05, + "timestamp": "2025-09-10 02:26:16.324261", + "step": 4962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:16.358251", + "step": 4962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003547137137502432, + "timestamp": "2025-09-10 02:26:16.365900", + "step": 4963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:16.399984", + "step": 4963, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.306949894176796e-05, + "timestamp": "2025-09-10 02:26:16.434207", + "step": 4964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:16.467886", + "step": 4964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013334887626115233, + "timestamp": "2025-09-10 02:26:16.473130", + "step": 4965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:16.504496", + "step": 4965, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.8079971495317295e-05, + "timestamp": "2025-09-10 02:26:16.516848", + "step": 4966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:16.547233", + "step": 4966, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.8230766222113743e-05, + "timestamp": "2025-09-10 02:26:16.551728", + "step": 4967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:16.582260", + "step": 4967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009217527112923563, + "timestamp": "2025-09-10 02:26:16.610964", + "step": 4968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:16.641445", + "step": 4968, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.093651169678196e-05, + "timestamp": "2025-09-10 02:26:16.646147", + "step": 4969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:16.677638", + "step": 4969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006383144063875079, + "timestamp": "2025-09-10 02:26:16.685338", + "step": 4970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:16.716465", + "step": 4970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012200616765767336, + "timestamp": "2025-09-10 02:26:16.724215", + "step": 4971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:16.756583", + "step": 4971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018401713168714195, + "timestamp": "2025-09-10 02:26:16.784994", + "step": 4972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:16.815604", + "step": 4972, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.1525421945843846e-05, + "timestamp": "2025-09-10 02:26:16.817894", + "step": 4973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:16.849803", + "step": 4973, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023603920999448746, + "timestamp": "2025-09-10 02:26:16.857514", + "step": 4974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:16.891206", + "step": 4974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003533114795573056, + "timestamp": "2025-09-10 02:26:16.904618", + "step": 4975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:16.936270", + "step": 4975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.07041340321302414, + "timestamp": "2025-09-10 02:26:16.964307", + "step": 4976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:16.995447", + "step": 4976, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019405941711738706, + "timestamp": "2025-09-10 02:26:17.000357", + "step": 4977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:17.034936", + "step": 4977, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004297175328247249, + "timestamp": "2025-09-10 02:26:17.045363", + "step": 4978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:17.077867", + "step": 4978, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030306234839372337, + "timestamp": "2025-09-10 02:26:17.085791", + "step": 4979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:17.119765", + "step": 4979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000214372223126702, + "timestamp": "2025-09-10 02:26:17.154008", + "step": 4980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:17.185421", + "step": 4980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0060597313567996025, + "timestamp": "2025-09-10 02:26:17.190972", + "step": 4981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:26:17.224555", + "step": 4981, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004712548106908798, + "timestamp": "2025-09-10 02:26:17.226680", + "step": 4982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:17.259673", + "step": 4982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018239330500364304, + "timestamp": "2025-09-10 02:26:17.266706", + "step": 4983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:17.298378", + "step": 4983, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.246271757641807e-05, + "timestamp": "2025-09-10 02:26:17.331775", + "step": 4984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:17.362536", + "step": 4984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000588534923736006, + "timestamp": "2025-09-10 02:26:17.367121", + "step": 4985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:17.397655", + "step": 4985, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04317157343029976, + "timestamp": "2025-09-10 02:26:17.405334", + "step": 4986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:17.436371", + "step": 4986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006422134465537965, + "timestamp": "2025-09-10 02:26:17.443245", + "step": 4987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:17.474316", + "step": 4987, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034632027382031083, + "timestamp": "2025-09-10 02:26:17.502573", + "step": 4988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:17.535084", + "step": 4988, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.68417409947142e-05, + "timestamp": "2025-09-10 02:26:17.539828", + "step": 4989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:17.572346", + "step": 4989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013398627925198525, + "timestamp": "2025-09-10 02:26:17.576500", + "step": 4990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:17.612524", + "step": 4990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029133365023881197, + "timestamp": "2025-09-10 02:26:17.626175", + "step": 4991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:17.658240", + "step": 4991, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.629496005596593e-05, + "timestamp": "2025-09-10 02:26:17.686141", + "step": 4992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:17.718276", + "step": 4992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002693708229344338, + "timestamp": "2025-09-10 02:26:17.723486", + "step": 4993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:17.755369", + "step": 4993, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012545159552246332, + "timestamp": "2025-09-10 02:26:17.763054", + "step": 4994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:17.795274", + "step": 4994, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005542716244235635, + "timestamp": "2025-09-10 02:26:17.807429", + "step": 4995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:17.838080", + "step": 4995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003210590220987797, + "timestamp": "2025-09-10 02:26:17.861907", + "step": 4996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:17.893230", + "step": 4996, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.695386011619121e-05, + "timestamp": "2025-09-10 02:26:17.898024", + "step": 4997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:17.928678", + "step": 4997, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001586790895089507, + "timestamp": "2025-09-10 02:26:17.936042", + "step": 4998, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:26:28.250382", + "step": 4998, + "epoch": 3 + }, + { + "type": "pplx", + "content": 20015215.356057025, + "timestamp": "2025-09-10 02:26:28.261232", + "step": 4998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:28.310477", + "step": 4998, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005191663512960076, + "timestamp": "2025-09-10 02:26:28.323762", + "step": 4999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:28.385226", + "step": 4999, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004746699705719948, + "timestamp": "2025-09-10 02:26:28.412768", + "step": 5000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 5000", + "timestamp": "2025-09-10 02:26:33.179348", + "step": 5000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:33.211464", + "step": 5000, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004966092528775334, + "timestamp": "2025-09-10 02:26:33.215172", + "step": 5001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:33.247140", + "step": 5001, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005471754702739418, + "timestamp": "2025-09-10 02:26:33.253624", + "step": 5002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:33.286134", + "step": 5002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014292819832917303, + "timestamp": "2025-09-10 02:26:33.295863", + "step": 5003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:33.327101", + "step": 5003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004297696577850729, + "timestamp": "2025-09-10 02:26:33.354748", + "step": 5004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:33.386788", + "step": 5004, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.166915358742699e-05, + "timestamp": "2025-09-10 02:26:33.396413", + "step": 5005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:33.427713", + "step": 5005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015276219346560538, + "timestamp": "2025-09-10 02:26:33.438578", + "step": 5006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:33.472065", + "step": 5006, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017286234069615602, + "timestamp": "2025-09-10 02:26:33.482351", + "step": 5007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:33.512627", + "step": 5007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016119235078804195, + "timestamp": "2025-09-10 02:26:33.540535", + "step": 5008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:33.573284", + "step": 5008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011435570195317268, + "timestamp": "2025-09-10 02:26:33.586284", + "step": 5009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:26:33.621203", + "step": 5009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010838081361725926, + "timestamp": "2025-09-10 02:26:33.635195", + "step": 5010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:33.665549", + "step": 5010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003136697050649673, + "timestamp": "2025-09-10 02:26:33.672471", + "step": 5011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:33.703720", + "step": 5011, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.7531153288437054e-05, + "timestamp": "2025-09-10 02:26:33.728906", + "step": 5012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:26:33.765485", + "step": 5012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001678946428000927, + "timestamp": "2025-09-10 02:26:33.780640", + "step": 5013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:33.812596", + "step": 5013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004162538971286267, + "timestamp": "2025-09-10 02:26:33.815078", + "step": 5014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:33.847600", + "step": 5014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012148728128522635, + "timestamp": "2025-09-10 02:26:33.852067", + "step": 5015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:33.883868", + "step": 5015, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019105346873402596, + "timestamp": "2025-09-10 02:26:33.915046", + "step": 5016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:33.946080", + "step": 5016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001212744740769267, + "timestamp": "2025-09-10 02:26:33.953972", + "step": 5017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:33.986413", + "step": 5017, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005570516805164516, + "timestamp": "2025-09-10 02:26:33.993998", + "step": 5018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:34.024829", + "step": 5018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004198791459202766, + "timestamp": "2025-09-10 02:26:34.035098", + "step": 5019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:34.066423", + "step": 5019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003597374598030001, + "timestamp": "2025-09-10 02:26:34.098448", + "step": 5020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:34.131175", + "step": 5020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01109243929386139, + "timestamp": "2025-09-10 02:26:34.141461", + "step": 5021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:34.172970", + "step": 5021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014968998730182648, + "timestamp": "2025-09-10 02:26:34.180732", + "step": 5022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:34.212729", + "step": 5022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012139989848947152, + "timestamp": "2025-09-10 02:26:34.219631", + "step": 5023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:34.251172", + "step": 5023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002333705051569268, + "timestamp": "2025-09-10 02:26:34.277453", + "step": 5024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:34.308015", + "step": 5024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001543233374832198, + "timestamp": "2025-09-10 02:26:34.312696", + "step": 5025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:26:34.347950", + "step": 5025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005688256933353841, + "timestamp": "2025-09-10 02:26:34.361964", + "step": 5026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:34.397329", + "step": 5026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042403684346936643, + "timestamp": "2025-09-10 02:26:34.410955", + "step": 5027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:34.444474", + "step": 5027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002217363507952541, + "timestamp": "2025-09-10 02:26:34.477391", + "step": 5028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:34.508716", + "step": 5028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006893317913636565, + "timestamp": "2025-09-10 02:26:34.511043", + "step": 5029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:34.541016", + "step": 5029, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008850133046507835, + "timestamp": "2025-09-10 02:26:34.548002", + "step": 5030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:34.582927", + "step": 5030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032538484083488584, + "timestamp": "2025-09-10 02:26:34.590672", + "step": 5031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:34.626976", + "step": 5031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039912323700264096, + "timestamp": "2025-09-10 02:26:34.658831", + "step": 5032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:34.695229", + "step": 5032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007538055535405874, + "timestamp": "2025-09-10 02:26:34.699593", + "step": 5033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:34.730572", + "step": 5033, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.657992344116792e-05, + "timestamp": "2025-09-10 02:26:34.740878", + "step": 5034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:26:34.779662", + "step": 5034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010134560288861394, + "timestamp": "2025-09-10 02:26:34.795315", + "step": 5035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:34.827775", + "step": 5035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024202538188546896, + "timestamp": "2025-09-10 02:26:34.856235", + "step": 5036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:34.886920", + "step": 5036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004926332621835172, + "timestamp": "2025-09-10 02:26:34.897060", + "step": 5037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:34.928166", + "step": 5037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006999452598392963, + "timestamp": "2025-09-10 02:26:34.935741", + "step": 5038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:34.966631", + "step": 5038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005028885323554277, + "timestamp": "2025-09-10 02:26:34.974266", + "step": 5039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:35.008987", + "step": 5039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027442388818599284, + "timestamp": "2025-09-10 02:26:35.034466", + "step": 5040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:35.066296", + "step": 5040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005980245769023895, + "timestamp": "2025-09-10 02:26:35.068853", + "step": 5041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:35.099419", + "step": 5041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015993161650840193, + "timestamp": "2025-09-10 02:26:35.109597", + "step": 5042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:35.142793", + "step": 5042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005007135332562029, + "timestamp": "2025-09-10 02:26:35.147451", + "step": 5043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:35.180162", + "step": 5043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006699333898723125, + "timestamp": "2025-09-10 02:26:35.207921", + "step": 5044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:35.240430", + "step": 5044, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028629746520891786, + "timestamp": "2025-09-10 02:26:35.245081", + "step": 5045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:35.277554", + "step": 5045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006818973342888057, + "timestamp": "2025-09-10 02:26:35.287372", + "step": 5046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:35.318835", + "step": 5046, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002404263708740473, + "timestamp": "2025-09-10 02:26:35.326633", + "step": 5047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:35.359340", + "step": 5047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02151825651526451, + "timestamp": "2025-09-10 02:26:35.387052", + "step": 5048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:35.417860", + "step": 5048, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003552958951331675, + "timestamp": "2025-09-10 02:26:35.422447", + "step": 5049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:35.452240", + "step": 5049, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025253373314626515, + "timestamp": "2025-09-10 02:26:35.459190", + "step": 5050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:35.489346", + "step": 5050, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040127182728610933, + "timestamp": "2025-09-10 02:26:35.493878", + "step": 5051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:35.524519", + "step": 5051, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006088269292376935, + "timestamp": "2025-09-10 02:26:35.549424", + "step": 5052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:35.582289", + "step": 5052, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042608132935129106, + "timestamp": "2025-09-10 02:26:35.585338", + "step": 5053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:35.617714", + "step": 5053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003567171806935221, + "timestamp": "2025-09-10 02:26:35.621800", + "step": 5054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:35.653481", + "step": 5054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005917255766689777, + "timestamp": "2025-09-10 02:26:35.663678", + "step": 5055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:35.694835", + "step": 5055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010045560338767245, + "timestamp": "2025-09-10 02:26:35.718582", + "step": 5056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:35.750639", + "step": 5056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001807038497645408, + "timestamp": "2025-09-10 02:26:35.755218", + "step": 5057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:35.790888", + "step": 5057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001338689005933702, + "timestamp": "2025-09-10 02:26:35.798429", + "step": 5058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:35.833677", + "step": 5058, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007186224684119225, + "timestamp": "2025-09-10 02:26:35.845938", + "step": 5059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:35.876959", + "step": 5059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001849995693191886, + "timestamp": "2025-09-10 02:26:35.902008", + "step": 5060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:35.936006", + "step": 5060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040959817124530673, + "timestamp": "2025-09-10 02:26:35.944168", + "step": 5061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:35.978195", + "step": 5061, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007337800343520939, + "timestamp": "2025-09-10 02:26:35.985672", + "step": 5062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:36.017177", + "step": 5062, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004766891244798899, + "timestamp": "2025-09-10 02:26:36.024108", + "step": 5063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:36.057466", + "step": 5063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009358166716992855, + "timestamp": "2025-09-10 02:26:36.085206", + "step": 5064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:36.119710", + "step": 5064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004207981692161411, + "timestamp": "2025-09-10 02:26:36.128101", + "step": 5065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:36.160614", + "step": 5065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005621476448141038, + "timestamp": "2025-09-10 02:26:36.167553", + "step": 5066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:36.198941", + "step": 5066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005337732727639377, + "timestamp": "2025-09-10 02:26:36.205778", + "step": 5067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:36.239845", + "step": 5067, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006483304314315319, + "timestamp": "2025-09-10 02:26:36.268263", + "step": 5068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:36.299494", + "step": 5068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004982685786671937, + "timestamp": "2025-09-10 02:26:36.308801", + "step": 5069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:36.348134", + "step": 5069, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000861111271660775, + "timestamp": "2025-09-10 02:26:36.361512", + "step": 5070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:36.393080", + "step": 5070, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017864606343209743, + "timestamp": "2025-09-10 02:26:36.403739", + "step": 5071, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:26:36.460224", + "step": 5071, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005434316699393094, + "timestamp": "2025-09-10 02:26:36.504467", + "step": 5072, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:36.537697", + "step": 5072, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014476195792667568, + "timestamp": "2025-09-10 02:26:36.542072", + "step": 5073, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:36.576057", + "step": 5073, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019232665363233536, + "timestamp": "2025-09-10 02:26:36.582602", + "step": 5074, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:36.616440", + "step": 5074, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004715229442808777, + "timestamp": "2025-09-10 02:26:36.624236", + "step": 5075, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:36.657634", + "step": 5075, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015088057261891663, + "timestamp": "2025-09-10 02:26:36.685386", + "step": 5076, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:36.718883", + "step": 5076, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018138757441192865, + "timestamp": "2025-09-10 02:26:36.723325", + "step": 5077, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:36.756301", + "step": 5077, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036036456003785133, + "timestamp": "2025-09-10 02:26:36.768752", + "step": 5078, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:36.802805", + "step": 5078, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014213754911907017, + "timestamp": "2025-09-10 02:26:36.809991", + "step": 5079, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:36.842491", + "step": 5079, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002783481031656265, + "timestamp": "2025-09-10 02:26:36.870966", + "step": 5080, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:36.905049", + "step": 5080, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010689280461519957, + "timestamp": "2025-09-10 02:26:36.909941", + "step": 5081, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:36.943472", + "step": 5081, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004427754320204258, + "timestamp": "2025-09-10 02:26:36.950393", + "step": 5082, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:36.981893", + "step": 5082, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009393549407832325, + "timestamp": "2025-09-10 02:26:36.988419", + "step": 5083, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:37.020810", + "step": 5083, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021152915433049202, + "timestamp": "2025-09-10 02:26:37.048368", + "step": 5084, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:37.080873", + "step": 5084, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002822284121066332, + "timestamp": "2025-09-10 02:26:37.083125", + "step": 5085, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:37.116842", + "step": 5085, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.049205031245946884, + "timestamp": "2025-09-10 02:26:37.127741", + "step": 5086, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:37.160653", + "step": 5086, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024678107001818717, + "timestamp": "2025-09-10 02:26:37.167406", + "step": 5087, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:37.203699", + "step": 5087, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005720262881368399, + "timestamp": "2025-09-10 02:26:37.231850", + "step": 5088, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:37.263031", + "step": 5088, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001497715711593628, + "timestamp": "2025-09-10 02:26:37.267326", + "step": 5089, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:37.302313", + "step": 5089, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002114651957526803, + "timestamp": "2025-09-10 02:26:37.314889", + "step": 5090, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:26:37.356579", + "step": 5090, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011306433007121086, + "timestamp": "2025-09-10 02:26:37.374278", + "step": 5091, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:37.404869", + "step": 5091, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004676491022109985, + "timestamp": "2025-09-10 02:26:37.433034", + "step": 5092, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:37.462511", + "step": 5092, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011396221816539764, + "timestamp": "2025-09-10 02:26:37.467061", + "step": 5093, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:37.496773", + "step": 5093, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000759919814299792, + "timestamp": "2025-09-10 02:26:37.503790", + "step": 5094, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:37.535802", + "step": 5094, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020001002121716738, + "timestamp": "2025-09-10 02:26:37.542443", + "step": 5095, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:37.572089", + "step": 5095, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017582971486262977, + "timestamp": "2025-09-10 02:26:37.599644", + "step": 5096, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:37.630671", + "step": 5096, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004590843745972961, + "timestamp": "2025-09-10 02:26:37.635779", + "step": 5097, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:37.676345", + "step": 5097, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002961141581181437, + "timestamp": "2025-09-10 02:26:37.686453", + "step": 5098, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:37.726004", + "step": 5098, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022437986626755446, + "timestamp": "2025-09-10 02:26:37.739344", + "step": 5099, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:37.773356", + "step": 5099, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002960095298476517, + "timestamp": "2025-09-10 02:26:37.800799", + "step": 5100, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:37.831592", + "step": 5100, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015804110444150865, + "timestamp": "2025-09-10 02:26:37.836206", + "step": 5101, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:37.871853", + "step": 5101, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004349082300905138, + "timestamp": "2025-09-10 02:26:37.885536", + "step": 5102, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:37.915557", + "step": 5102, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009989996906369925, + "timestamp": "2025-09-10 02:26:37.922804", + "step": 5103, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:37.956305", + "step": 5103, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002965817984659225, + "timestamp": "2025-09-10 02:26:37.981392", + "step": 5104, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:38.019315", + "step": 5104, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011326335516059771, + "timestamp": "2025-09-10 02:26:38.027976", + "step": 5105, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:38.059644", + "step": 5105, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006430582143366337, + "timestamp": "2025-09-10 02:26:38.067309", + "step": 5106, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:38.103324", + "step": 5106, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03719168156385422, + "timestamp": "2025-09-10 02:26:38.113064", + "step": 5107, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:38.150802", + "step": 5107, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005424571572802961, + "timestamp": "2025-09-10 02:26:38.181667", + "step": 5108, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:38.214866", + "step": 5108, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010797369759529829, + "timestamp": "2025-09-10 02:26:38.219882", + "step": 5109, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:38.251030", + "step": 5109, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.883118425728753e-05, + "timestamp": "2025-09-10 02:26:38.263566", + "step": 5110, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:38.294698", + "step": 5110, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.636332571157254e-05, + "timestamp": "2025-09-10 02:26:38.301741", + "step": 5111, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:38.333874", + "step": 5111, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013791404489893466, + "timestamp": "2025-09-10 02:26:38.362134", + "step": 5112, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:38.402640", + "step": 5112, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002762853109743446, + "timestamp": "2025-09-10 02:26:38.407777", + "step": 5113, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:38.449345", + "step": 5113, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036431459011510015, + "timestamp": "2025-09-10 02:26:38.453859", + "step": 5114, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:38.484171", + "step": 5114, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002119356067851186, + "timestamp": "2025-09-10 02:26:38.490985", + "step": 5115, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:38.537020", + "step": 5115, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015236083418130875, + "timestamp": "2025-09-10 02:26:38.568091", + "step": 5116, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:38.600258", + "step": 5116, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002494109212420881, + "timestamp": "2025-09-10 02:26:38.610000", + "step": 5117, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:38.642033", + "step": 5117, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010412498522782698, + "timestamp": "2025-09-10 02:26:38.646738", + "step": 5118, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:38.678539", + "step": 5118, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.976993947522715e-05, + "timestamp": "2025-09-10 02:26:38.685400", + "step": 5119, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:38.718264", + "step": 5119, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002568106632679701, + "timestamp": "2025-09-10 02:26:38.745928", + "step": 5120, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:38.780243", + "step": 5120, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005041944095864892, + "timestamp": "2025-09-10 02:26:38.785032", + "step": 5121, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:38.817838", + "step": 5121, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011331056157359853, + "timestamp": "2025-09-10 02:26:38.829518", + "step": 5122, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:38.862267", + "step": 5122, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043031698442064226, + "timestamp": "2025-09-10 02:26:38.874856", + "step": 5123, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:38.908541", + "step": 5123, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002998805372044444, + "timestamp": "2025-09-10 02:26:38.936629", + "step": 5124, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:38.968508", + "step": 5124, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01266338862478733, + "timestamp": "2025-09-10 02:26:38.973088", + "step": 5125, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:39.003978", + "step": 5125, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002824350376613438, + "timestamp": "2025-09-10 02:26:39.010652", + "step": 5126, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:39.042165", + "step": 5126, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010247425962006673, + "timestamp": "2025-09-10 02:26:39.046697", + "step": 5127, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:39.081094", + "step": 5127, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008860399248078465, + "timestamp": "2025-09-10 02:26:39.108780", + "step": 5128, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:39.143111", + "step": 5128, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.454523049294949e-05, + "timestamp": "2025-09-10 02:26:39.147281", + "step": 5129, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:39.187542", + "step": 5129, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003048314538318664, + "timestamp": "2025-09-10 02:26:39.197351", + "step": 5130, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:39.230939", + "step": 5130, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018424120207782835, + "timestamp": "2025-09-10 02:26:39.240890", + "step": 5131, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:39.272716", + "step": 5131, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006012596422806382, + "timestamp": "2025-09-10 02:26:39.300808", + "step": 5132, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:39.332691", + "step": 5132, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003009784559253603, + "timestamp": "2025-09-10 02:26:39.339735", + "step": 5133, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:39.375609", + "step": 5133, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03187219798564911, + "timestamp": "2025-09-10 02:26:39.385234", + "step": 5134, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:39.423679", + "step": 5134, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010775693226605654, + "timestamp": "2025-09-10 02:26:39.434241", + "step": 5135, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:39.464830", + "step": 5135, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001194292795844376, + "timestamp": "2025-09-10 02:26:39.496349", + "step": 5136, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:39.527484", + "step": 5136, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019652375485748053, + "timestamp": "2025-09-10 02:26:39.535242", + "step": 5137, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:39.568984", + "step": 5137, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010311486199498177, + "timestamp": "2025-09-10 02:26:39.576682", + "step": 5138, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:39.606200", + "step": 5138, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019729572522919625, + "timestamp": "2025-09-10 02:26:39.613264", + "step": 5139, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:39.643471", + "step": 5139, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033027934841811657, + "timestamp": "2025-09-10 02:26:39.671867", + "step": 5140, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:39.703487", + "step": 5140, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022352919913828373, + "timestamp": "2025-09-10 02:26:39.705615", + "step": 5141, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:39.737702", + "step": 5141, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002084512962028384, + "timestamp": "2025-09-10 02:26:39.746825", + "step": 5142, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:39.778404", + "step": 5142, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020568576292134821, + "timestamp": "2025-09-10 02:26:39.789245", + "step": 5143, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:39.818748", + "step": 5143, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034350433270446956, + "timestamp": "2025-09-10 02:26:39.849832", + "step": 5144, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:39.882711", + "step": 5144, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.809032781049609e-05, + "timestamp": "2025-09-10 02:26:39.892207", + "step": 5145, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:26:50.094432", + "step": 5145, + "epoch": 3 + }, + { + "type": "pplx", + "content": 19013441.374623075, + "timestamp": "2025-09-10 02:26:50.098424", + "step": 5145, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:26:50.128744", + "step": 5145, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014527181629091501, + "timestamp": "2025-09-10 02:26:50.130787", + "step": 5146, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:50.162665", + "step": 5146, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.027358056977391243, + "timestamp": "2025-09-10 02:26:50.168867", + "step": 5147, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:50.202488", + "step": 5147, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004902129294350743, + "timestamp": "2025-09-10 02:26:50.229570", + "step": 5148, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:50.265151", + "step": 5148, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014729471877217293, + "timestamp": "2025-09-10 02:26:50.269830", + "step": 5149, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:50.302158", + "step": 5149, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003401133930310607, + "timestamp": "2025-09-10 02:26:50.309049", + "step": 5150, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:50.340073", + "step": 5150, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013149350706953555, + "timestamp": "2025-09-10 02:26:50.351481", + "step": 5151, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:50.385466", + "step": 5151, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01326068490743637, + "timestamp": "2025-09-10 02:26:50.419683", + "step": 5152, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:50.451962", + "step": 5152, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036567659117281437, + "timestamp": "2025-09-10 02:26:50.456231", + "step": 5153, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:50.488390", + "step": 5153, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013120094314217567, + "timestamp": "2025-09-10 02:26:50.495873", + "step": 5154, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:50.529639", + "step": 5154, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017451155872549862, + "timestamp": "2025-09-10 02:26:50.536633", + "step": 5155, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:50.567172", + "step": 5155, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037706297007389367, + "timestamp": "2025-09-10 02:26:50.592290", + "step": 5156, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:50.624924", + "step": 5156, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002998369454871863, + "timestamp": "2025-09-10 02:26:50.628990", + "step": 5157, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:50.661318", + "step": 5157, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006355083896778524, + "timestamp": "2025-09-10 02:26:50.665583", + "step": 5158, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:26:50.700149", + "step": 5158, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024739918299019337, + "timestamp": "2025-09-10 02:26:50.714091", + "step": 5159, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:50.747276", + "step": 5159, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006842431612312794, + "timestamp": "2025-09-10 02:26:50.772316", + "step": 5160, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:50.804366", + "step": 5160, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001715581602184102, + "timestamp": "2025-09-10 02:26:50.806614", + "step": 5161, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:50.838444", + "step": 5161, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001334312546532601, + "timestamp": "2025-09-10 02:26:50.850899", + "step": 5162, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:50.882059", + "step": 5162, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015925957122817636, + "timestamp": "2025-09-10 02:26:50.892413", + "step": 5163, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:50.924795", + "step": 5163, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.035536449402570724, + "timestamp": "2025-09-10 02:26:50.952247", + "step": 5164, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:50.984434", + "step": 5164, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.032359056174755096, + "timestamp": "2025-09-10 02:26:50.988981", + "step": 5165, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:51.019851", + "step": 5165, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012751846807077527, + "timestamp": "2025-09-10 02:26:51.024166", + "step": 5166, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:51.057009", + "step": 5166, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008924083085730672, + "timestamp": "2025-09-10 02:26:51.061492", + "step": 5167, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:51.094260", + "step": 5167, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009923691395670176, + "timestamp": "2025-09-10 02:26:51.127680", + "step": 5168, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:51.159718", + "step": 5168, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004293483681976795, + "timestamp": "2025-09-10 02:26:51.167509", + "step": 5169, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:51.198390", + "step": 5169, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001559741358505562, + "timestamp": "2025-09-10 02:26:51.202516", + "step": 5170, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:51.234801", + "step": 5170, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003882385208271444, + "timestamp": "2025-09-10 02:26:51.242426", + "step": 5171, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:51.274436", + "step": 5171, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005971429636701941, + "timestamp": "2025-09-10 02:26:51.299776", + "step": 5172, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:51.330997", + "step": 5172, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021882994042243809, + "timestamp": "2025-09-10 02:26:51.336487", + "step": 5173, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:51.367881", + "step": 5173, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011246565263718367, + "timestamp": "2025-09-10 02:26:51.375480", + "step": 5174, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:51.405682", + "step": 5174, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022782094310969114, + "timestamp": "2025-09-10 02:26:51.410086", + "step": 5175, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:51.442061", + "step": 5175, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039846167783252895, + "timestamp": "2025-09-10 02:26:51.467694", + "step": 5176, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:51.501857", + "step": 5176, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024708619457669556, + "timestamp": "2025-09-10 02:26:51.514555", + "step": 5177, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:51.546565", + "step": 5177, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05931756645441055, + "timestamp": "2025-09-10 02:26:51.558679", + "step": 5178, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:51.590202", + "step": 5178, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005889888852834702, + "timestamp": "2025-09-10 02:26:51.594559", + "step": 5179, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:51.625517", + "step": 5179, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023044981935527176, + "timestamp": "2025-09-10 02:26:51.653377", + "step": 5180, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:51.684595", + "step": 5180, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009921352611854672, + "timestamp": "2025-09-10 02:26:51.689662", + "step": 5181, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:51.720180", + "step": 5181, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005538056720979512, + "timestamp": "2025-09-10 02:26:51.731255", + "step": 5182, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:51.762227", + "step": 5182, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023367933463305235, + "timestamp": "2025-09-10 02:26:51.772542", + "step": 5183, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:51.804192", + "step": 5183, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002975693787448108, + "timestamp": "2025-09-10 02:26:51.832091", + "step": 5184, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:51.864290", + "step": 5184, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006394670926965773, + "timestamp": "2025-09-10 02:26:51.871192", + "step": 5185, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:26:51.902617", + "step": 5185, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005975649692118168, + "timestamp": "2025-09-10 02:26:51.904913", + "step": 5186, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:51.937164", + "step": 5186, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004425121296662837, + "timestamp": "2025-09-10 02:26:51.943970", + "step": 5187, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:51.975672", + "step": 5187, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035937223583459854, + "timestamp": "2025-09-10 02:26:52.003857", + "step": 5188, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:52.035467", + "step": 5188, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011359489290043712, + "timestamp": "2025-09-10 02:26:52.037712", + "step": 5189, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:52.068998", + "step": 5189, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003917438443750143, + "timestamp": "2025-09-10 02:26:52.075712", + "step": 5190, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:52.107588", + "step": 5190, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001755694829626009, + "timestamp": "2025-09-10 02:26:52.114296", + "step": 5191, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:52.146621", + "step": 5191, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007622348493896425, + "timestamp": "2025-09-10 02:26:52.174521", + "step": 5192, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:52.206264", + "step": 5192, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00104383728466928, + "timestamp": "2025-09-10 02:26:52.213874", + "step": 5193, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:52.246717", + "step": 5193, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010719360579969361, + "timestamp": "2025-09-10 02:26:52.250995", + "step": 5194, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:52.282112", + "step": 5194, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022146674746181816, + "timestamp": "2025-09-10 02:26:52.289447", + "step": 5195, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:52.321360", + "step": 5195, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000337046105414629, + "timestamp": "2025-09-10 02:26:52.349962", + "step": 5196, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:52.381024", + "step": 5196, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005232029943726957, + "timestamp": "2025-09-10 02:26:52.385715", + "step": 5197, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:52.416162", + "step": 5197, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024187321832869202, + "timestamp": "2025-09-10 02:26:52.426355", + "step": 5198, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:52.457633", + "step": 5198, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026408951729536057, + "timestamp": "2025-09-10 02:26:52.462089", + "step": 5199, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:26:52.500650", + "step": 5199, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00909572746604681, + "timestamp": "2025-09-10 02:26:52.537239", + "step": 5200, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:52.568974", + "step": 5200, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032603865838609636, + "timestamp": "2025-09-10 02:26:52.573368", + "step": 5201, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:52.603891", + "step": 5201, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005753154866397381, + "timestamp": "2025-09-10 02:26:52.610681", + "step": 5202, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:52.641795", + "step": 5202, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010426250519230962, + "timestamp": "2025-09-10 02:26:52.649517", + "step": 5203, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:26:52.689718", + "step": 5203, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0064912172965705395, + "timestamp": "2025-09-10 02:26:52.726772", + "step": 5204, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:52.759662", + "step": 5204, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025196291971951723, + "timestamp": "2025-09-10 02:26:52.761919", + "step": 5205, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:52.796838", + "step": 5205, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011967868776991963, + "timestamp": "2025-09-10 02:26:52.810487", + "step": 5206, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:52.842346", + "step": 5206, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008071462507359684, + "timestamp": "2025-09-10 02:26:52.849531", + "step": 5207, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:52.881484", + "step": 5207, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036838999949395657, + "timestamp": "2025-09-10 02:26:52.908146", + "step": 5208, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:52.947984", + "step": 5208, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002956208190880716, + "timestamp": "2025-09-10 02:26:52.952213", + "step": 5209, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:52.984034", + "step": 5209, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003162443172186613, + "timestamp": "2025-09-10 02:26:52.986347", + "step": 5210, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:26:53.016915", + "step": 5210, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004851007543038577, + "timestamp": "2025-09-10 02:26:53.019203", + "step": 5211, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:26:53.057262", + "step": 5211, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001697681873338297, + "timestamp": "2025-09-10 02:26:53.094094", + "step": 5212, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:26:53.124931", + "step": 5212, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003544171922840178, + "timestamp": "2025-09-10 02:26:53.126906", + "step": 5213, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:53.158968", + "step": 5213, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007984244264662266, + "timestamp": "2025-09-10 02:26:53.163284", + "step": 5214, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:53.194990", + "step": 5214, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009763463167473674, + "timestamp": "2025-09-10 02:26:53.202091", + "step": 5215, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:53.233627", + "step": 5215, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.645112393423915e-05, + "timestamp": "2025-09-10 02:26:53.264071", + "step": 5216, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:53.296248", + "step": 5216, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014072064368519932, + "timestamp": "2025-09-10 02:26:53.300402", + "step": 5217, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:53.332383", + "step": 5217, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012747065164148808, + "timestamp": "2025-09-10 02:26:53.341941", + "step": 5218, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:26:53.375345", + "step": 5218, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011503745190566406, + "timestamp": "2025-09-10 02:26:53.389105", + "step": 5219, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:53.422086", + "step": 5219, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017217174172401428, + "timestamp": "2025-09-10 02:26:53.450612", + "step": 5220, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:53.482505", + "step": 5220, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002020140818785876, + "timestamp": "2025-09-10 02:26:53.487460", + "step": 5221, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:53.520956", + "step": 5221, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000206151555175893, + "timestamp": "2025-09-10 02:26:53.528561", + "step": 5222, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:53.560359", + "step": 5222, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019689316395670176, + "timestamp": "2025-09-10 02:26:53.571892", + "step": 5223, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:53.602541", + "step": 5223, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025810713414102793, + "timestamp": "2025-09-10 02:26:53.635699", + "step": 5224, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:26:53.671170", + "step": 5224, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005631653126329184, + "timestamp": "2025-09-10 02:26:53.684263", + "step": 5225, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:53.716685", + "step": 5225, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002063760912278667, + "timestamp": "2025-09-10 02:26:53.725994", + "step": 5226, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:53.757179", + "step": 5226, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010216274298727512, + "timestamp": "2025-09-10 02:26:53.764700", + "step": 5227, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:26:53.802766", + "step": 5227, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001996259670704603, + "timestamp": "2025-09-10 02:26:53.839561", + "step": 5228, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:53.869898", + "step": 5228, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011374764144420624, + "timestamp": "2025-09-10 02:26:53.874083", + "step": 5229, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:26:53.908877", + "step": 5229, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006331949844025075, + "timestamp": "2025-09-10 02:26:53.922647", + "step": 5230, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:53.955933", + "step": 5230, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007114322506822646, + "timestamp": "2025-09-10 02:26:53.969325", + "step": 5231, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:54.001297", + "step": 5231, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004758648574352264, + "timestamp": "2025-09-10 02:26:54.026232", + "step": 5232, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:54.058661", + "step": 5232, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021887525508645922, + "timestamp": "2025-09-10 02:26:54.066298", + "step": 5233, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:54.097606", + "step": 5233, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0062928879633545876, + "timestamp": "2025-09-10 02:26:54.101720", + "step": 5234, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:54.132929", + "step": 5234, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007375451386906207, + "timestamp": "2025-09-10 02:26:54.140376", + "step": 5235, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:54.173713", + "step": 5235, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021762121468782425, + "timestamp": "2025-09-10 02:26:54.198807", + "step": 5236, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:54.231943", + "step": 5236, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011317295720800757, + "timestamp": "2025-09-10 02:26:54.241667", + "step": 5237, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:54.275801", + "step": 5237, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016049954865593463, + "timestamp": "2025-09-10 02:26:54.283440", + "step": 5238, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:54.313598", + "step": 5238, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01704251952469349, + "timestamp": "2025-09-10 02:26:54.316005", + "step": 5239, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:54.348099", + "step": 5239, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001351992366835475, + "timestamp": "2025-09-10 02:26:54.376305", + "step": 5240, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:54.407460", + "step": 5240, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004043028748128563, + "timestamp": "2025-09-10 02:26:54.411662", + "step": 5241, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:54.442179", + "step": 5241, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005190826021134853, + "timestamp": "2025-09-10 02:26:54.451994", + "step": 5242, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:54.483998", + "step": 5242, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003823291917797178, + "timestamp": "2025-09-10 02:26:54.491018", + "step": 5243, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:54.521728", + "step": 5243, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018407927127555013, + "timestamp": "2025-09-10 02:26:54.550143", + "step": 5244, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:54.583304", + "step": 5244, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.925807636231184e-05, + "timestamp": "2025-09-10 02:26:54.592363", + "step": 5245, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:54.623476", + "step": 5245, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002199393231421709, + "timestamp": "2025-09-10 02:26:54.630980", + "step": 5246, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:26:54.672924", + "step": 5246, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037568985135294497, + "timestamp": "2025-09-10 02:26:54.690237", + "step": 5247, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:54.721466", + "step": 5247, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037835666444152594, + "timestamp": "2025-09-10 02:26:54.749018", + "step": 5248, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:54.782708", + "step": 5248, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022347843332681805, + "timestamp": "2025-09-10 02:26:54.785142", + "step": 5249, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 688 + ], + "flops": 20408222954560 + }, + "timestamp": "2025-09-10 02:26:54.841596", + "step": 5249, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.557055505458266e-05, + "timestamp": "2025-09-10 02:26:54.865716", + "step": 5250, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:54.898591", + "step": 5250, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002831138903275132, + "timestamp": "2025-09-10 02:26:54.900995", + "step": 5251, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:54.932463", + "step": 5251, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007521641673520207, + "timestamp": "2025-09-10 02:26:54.960783", + "step": 5252, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:54.991912", + "step": 5252, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.602372301742435e-05, + "timestamp": "2025-09-10 02:26:54.996710", + "step": 5253, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:55.027852", + "step": 5253, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014525480568408966, + "timestamp": "2025-09-10 02:26:55.030309", + "step": 5254, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:55.061017", + "step": 5254, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.630308730062097e-05, + "timestamp": "2025-09-10 02:26:55.072516", + "step": 5255, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:26:55.104329", + "step": 5255, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013203138951212168, + "timestamp": "2025-09-10 02:26:55.132507", + "step": 5256, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:55.164602", + "step": 5256, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021953012328594923, + "timestamp": "2025-09-10 02:26:55.169052", + "step": 5257, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:26:55.206701", + "step": 5257, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017087912186980247, + "timestamp": "2025-09-10 02:26:55.222355", + "step": 5258, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:55.253490", + "step": 5258, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040845529292710125, + "timestamp": "2025-09-10 02:26:55.260749", + "step": 5259, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 560 + ], + "flops": 16611393146432 + }, + "timestamp": "2025-09-10 02:26:55.308329", + "step": 5259, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.842081635724753e-05, + "timestamp": "2025-09-10 02:26:55.348588", + "step": 5260, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:55.379379", + "step": 5260, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008445015409961343, + "timestamp": "2025-09-10 02:26:55.383680", + "step": 5261, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:55.415441", + "step": 5261, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015750537568237633, + "timestamp": "2025-09-10 02:26:55.424919", + "step": 5262, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:55.456799", + "step": 5262, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009556380682624876, + "timestamp": "2025-09-10 02:26:55.463257", + "step": 5263, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:55.500776", + "step": 5263, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016424224304500967, + "timestamp": "2025-09-10 02:26:55.525896", + "step": 5264, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:55.557462", + "step": 5264, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011993583757430315, + "timestamp": "2025-09-10 02:26:55.567142", + "step": 5265, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:55.598056", + "step": 5265, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017937307711690664, + "timestamp": "2025-09-10 02:26:55.609593", + "step": 5266, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:55.641151", + "step": 5266, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018583576893433928, + "timestamp": "2025-09-10 02:26:55.647912", + "step": 5267, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:55.680586", + "step": 5267, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002548788907006383, + "timestamp": "2025-09-10 02:26:55.713197", + "step": 5268, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:55.744559", + "step": 5268, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015272719610948116, + "timestamp": "2025-09-10 02:26:55.748061", + "step": 5269, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:55.780263", + "step": 5269, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022310127678792924, + "timestamp": "2025-09-10 02:26:55.784449", + "step": 5270, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:55.814738", + "step": 5270, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000734326837118715, + "timestamp": "2025-09-10 02:26:55.818917", + "step": 5271, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:26:55.854007", + "step": 5271, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004884271766059101, + "timestamp": "2025-09-10 02:26:55.888955", + "step": 5272, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:55.920093", + "step": 5272, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021002234425395727, + "timestamp": "2025-09-10 02:26:55.922221", + "step": 5273, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:26:55.952507", + "step": 5273, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.44793731730897e-05, + "timestamp": "2025-09-10 02:26:55.956751", + "step": 5274, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:26:55.987419", + "step": 5274, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.469302312936634e-05, + "timestamp": "2025-09-10 02:26:55.994191", + "step": 5275, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:26:56.026153", + "step": 5275, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009514411212876439, + "timestamp": "2025-09-10 02:26:56.058722", + "step": 5276, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:26:56.090585", + "step": 5276, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.029165888321586e-05, + "timestamp": "2025-09-10 02:26:56.103707", + "step": 5277, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:56.134258", + "step": 5277, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03702101483941078, + "timestamp": "2025-09-10 02:26:56.138049", + "step": 5278, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:56.169637", + "step": 5278, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.557779463240877e-05, + "timestamp": "2025-09-10 02:26:56.172049", + "step": 5279, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:56.204160", + "step": 5279, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008645829511806369, + "timestamp": "2025-09-10 02:26:56.235523", + "step": 5280, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:56.267832", + "step": 5280, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013446787488646805, + "timestamp": "2025-09-10 02:26:56.271949", + "step": 5281, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:26:56.304519", + "step": 5281, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003033523913472891, + "timestamp": "2025-09-10 02:26:56.306908", + "step": 5282, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:56.337846", + "step": 5282, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018720559310168028, + "timestamp": "2025-09-10 02:26:56.341602", + "step": 5283, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:26:56.373785", + "step": 5283, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002793918247334659, + "timestamp": "2025-09-10 02:26:56.404155", + "step": 5284, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:26:56.438641", + "step": 5284, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05163096636533737, + "timestamp": "2025-09-10 02:26:56.441011", + "step": 5285, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:26:56.472033", + "step": 5285, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009458529762923717, + "timestamp": "2025-09-10 02:26:56.479305", + "step": 5286, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:26:56.520156", + "step": 5286, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008849184960126877, + "timestamp": "2025-09-10 02:26:56.537189", + "step": 5287, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:26:56.569025", + "step": 5287, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013038819888606668, + "timestamp": "2025-09-10 02:26:56.602016", + "step": 5288, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:56.633736", + "step": 5288, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04861394315958023, + "timestamp": "2025-09-10 02:26:56.638075", + "step": 5289, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:26:56.668284", + "step": 5289, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004373115487396717, + "timestamp": "2025-09-10 02:26:56.674804", + "step": 5290, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:26:56.707345", + "step": 5290, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035963160917162895, + "timestamp": "2025-09-10 02:26:56.717659", + "step": 5291, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:26:56.750783", + "step": 5291, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020156674145255238, + "timestamp": "2025-09-10 02:26:56.785068", + "step": 5292, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:27:06.874113", + "step": 5292, + "epoch": 3 + }, + { + "type": "pplx", + "content": 19403711.464340024, + "timestamp": "2025-09-10 02:27:06.877312", + "step": 5292, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:06.907901", + "step": 5292, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023475231137126684, + "timestamp": "2025-09-10 02:27:06.909938", + "step": 5293, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:06.942794", + "step": 5293, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001284535537706688, + "timestamp": "2025-09-10 02:27:06.952035", + "step": 5294, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:06.983428", + "step": 5294, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003995387349277735, + "timestamp": "2025-09-10 02:27:06.991032", + "step": 5295, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:07.022414", + "step": 5295, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036020742845721543, + "timestamp": "2025-09-10 02:27:07.047476", + "step": 5296, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:07.079413", + "step": 5296, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002324790257262066, + "timestamp": "2025-09-10 02:27:07.081576", + "step": 5297, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:07.113372", + "step": 5297, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002664544736035168, + "timestamp": "2025-09-10 02:27:07.125753", + "step": 5298, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:07.158414", + "step": 5298, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004768196027725935, + "timestamp": "2025-09-10 02:27:07.165119", + "step": 5299, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:07.197411", + "step": 5299, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005364773096516728, + "timestamp": "2025-09-10 02:27:07.228444", + "step": 5300, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:07.260805", + "step": 5300, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005759544670581818, + "timestamp": "2025-09-10 02:27:07.264918", + "step": 5301, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:07.296947", + "step": 5301, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015448706690222025, + "timestamp": "2025-09-10 02:27:07.304275", + "step": 5302, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 816 + ], + "flops": 24205052762688 + }, + "timestamp": "2025-09-10 02:27:07.372258", + "step": 5302, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.88330357358791e-05, + "timestamp": "2025-09-10 02:27:07.400778", + "step": 5303, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:07.433304", + "step": 5303, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007378348964266479, + "timestamp": "2025-09-10 02:27:07.461302", + "step": 5304, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:07.494345", + "step": 5304, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042900207336060703, + "timestamp": "2025-09-10 02:27:07.500713", + "step": 5305, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:07.533228", + "step": 5305, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002336119068786502, + "timestamp": "2025-09-10 02:27:07.544426", + "step": 5306, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:07.576992", + "step": 5306, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.151837053243071e-05, + "timestamp": "2025-09-10 02:27:07.580801", + "step": 5307, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:07.612631", + "step": 5307, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003323144337628037, + "timestamp": "2025-09-10 02:27:07.640830", + "step": 5308, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:07.671961", + "step": 5308, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012035273248329759, + "timestamp": "2025-09-10 02:27:07.674138", + "step": 5309, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:27:07.709551", + "step": 5309, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002544188464526087, + "timestamp": "2025-09-10 02:27:07.723594", + "step": 5310, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:07.755728", + "step": 5310, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003566377272363752, + "timestamp": "2025-09-10 02:27:07.762625", + "step": 5311, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:07.793161", + "step": 5311, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005627021309919655, + "timestamp": "2025-09-10 02:27:07.821649", + "step": 5312, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:07.853329", + "step": 5312, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019646212458610535, + "timestamp": "2025-09-10 02:27:07.858432", + "step": 5313, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:07.889314", + "step": 5313, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013626412488520145, + "timestamp": "2025-09-10 02:27:07.896849", + "step": 5314, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:07.927894", + "step": 5314, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002811032463796437, + "timestamp": "2025-09-10 02:27:07.934716", + "step": 5315, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:07.968017", + "step": 5315, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022139211068861187, + "timestamp": "2025-09-10 02:27:07.995554", + "step": 5316, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:08.029335", + "step": 5316, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019662485283333808, + "timestamp": "2025-09-10 02:27:08.038048", + "step": 5317, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:08.069272", + "step": 5317, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002974488597828895, + "timestamp": "2025-09-10 02:27:08.081644", + "step": 5318, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:27:08.116905", + "step": 5318, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016499229241162539, + "timestamp": "2025-09-10 02:27:08.130677", + "step": 5319, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:08.162174", + "step": 5319, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001375975989503786, + "timestamp": "2025-09-10 02:27:08.189861", + "step": 5320, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:08.221651", + "step": 5320, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034185725962743163, + "timestamp": "2025-09-10 02:27:08.229769", + "step": 5321, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:08.262367", + "step": 5321, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.766142592416145e-05, + "timestamp": "2025-09-10 02:27:08.266626", + "step": 5322, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:08.297298", + "step": 5322, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020192751253489405, + "timestamp": "2025-09-10 02:27:08.304114", + "step": 5323, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:08.334710", + "step": 5323, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017714654677547514, + "timestamp": "2025-09-10 02:27:08.362803", + "step": 5324, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:08.393965", + "step": 5324, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014676746213808656, + "timestamp": "2025-09-10 02:27:08.399243", + "step": 5325, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:08.432061", + "step": 5325, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007701607537455857, + "timestamp": "2025-09-10 02:27:08.442083", + "step": 5326, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:08.485836", + "step": 5326, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003435301478020847, + "timestamp": "2025-09-10 02:27:08.499222", + "step": 5327, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:08.533247", + "step": 5327, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.765466918703169e-05, + "timestamp": "2025-09-10 02:27:08.565826", + "step": 5328, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:08.596972", + "step": 5328, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012318461085669696, + "timestamp": "2025-09-10 02:27:08.599247", + "step": 5329, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:08.633232", + "step": 5329, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016483607469126582, + "timestamp": "2025-09-10 02:27:08.646899", + "step": 5330, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:08.677609", + "step": 5330, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008386906119994819, + "timestamp": "2025-09-10 02:27:08.684996", + "step": 5331, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:08.718082", + "step": 5331, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.022822031402029e-05, + "timestamp": "2025-09-10 02:27:08.746436", + "step": 5332, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:08.779520", + "step": 5332, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04045605659484863, + "timestamp": "2025-09-10 02:27:08.784700", + "step": 5333, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:08.821111", + "step": 5333, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.697760495124385e-05, + "timestamp": "2025-09-10 02:27:08.830598", + "step": 5334, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:08.863455", + "step": 5334, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010809339582920074, + "timestamp": "2025-09-10 02:27:08.874263", + "step": 5335, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:08.905338", + "step": 5335, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.760713272844441e-05, + "timestamp": "2025-09-10 02:27:08.931014", + "step": 5336, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:08.966761", + "step": 5336, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.413155668065883e-05, + "timestamp": "2025-09-10 02:27:08.972108", + "step": 5337, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:27:09.005498", + "step": 5337, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001805043575586751, + "timestamp": "2025-09-10 02:27:09.009564", + "step": 5338, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:09.043427", + "step": 5338, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0475006178021431, + "timestamp": "2025-09-10 02:27:09.050181", + "step": 5339, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:09.085556", + "step": 5339, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001535638002678752, + "timestamp": "2025-09-10 02:27:09.110509", + "step": 5340, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:09.141774", + "step": 5340, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0067802309058606625, + "timestamp": "2025-09-10 02:27:09.146917", + "step": 5341, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:09.186897", + "step": 5341, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02059916779398918, + "timestamp": "2025-09-10 02:27:09.195909", + "step": 5342, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:09.228865", + "step": 5342, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022535616881214082, + "timestamp": "2025-09-10 02:27:09.235276", + "step": 5343, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:09.270492", + "step": 5343, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002255578147014603, + "timestamp": "2025-09-10 02:27:09.301963", + "step": 5344, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:09.338400", + "step": 5344, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.521532436134294e-05, + "timestamp": "2025-09-10 02:27:09.341414", + "step": 5345, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:09.380109", + "step": 5345, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.905913349124603e-05, + "timestamp": "2025-09-10 02:27:09.386648", + "step": 5346, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:09.421685", + "step": 5346, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005463002598844469, + "timestamp": "2025-09-10 02:27:09.428072", + "step": 5347, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:09.461412", + "step": 5347, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023405032698065042, + "timestamp": "2025-09-10 02:27:09.488946", + "step": 5348, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:09.524728", + "step": 5348, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009855531388893723, + "timestamp": "2025-09-10 02:27:09.528887", + "step": 5349, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:09.560327", + "step": 5349, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001291104854317382, + "timestamp": "2025-09-10 02:27:09.572055", + "step": 5350, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:09.603665", + "step": 5350, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029408042319118977, + "timestamp": "2025-09-10 02:27:09.610845", + "step": 5351, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:09.641288", + "step": 5351, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007272699382156134, + "timestamp": "2025-09-10 02:27:09.665079", + "step": 5352, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:09.696094", + "step": 5352, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021549421944655478, + "timestamp": "2025-09-10 02:27:09.700554", + "step": 5353, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:09.731962", + "step": 5353, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029168991022743285, + "timestamp": "2025-09-10 02:27:09.744166", + "step": 5354, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:09.777081", + "step": 5354, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001440942141925916, + "timestamp": "2025-09-10 02:27:09.789247", + "step": 5355, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:09.820693", + "step": 5355, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006720181554555893, + "timestamp": "2025-09-10 02:27:09.848205", + "step": 5356, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:09.881714", + "step": 5356, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011303488863632083, + "timestamp": "2025-09-10 02:27:09.886340", + "step": 5357, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:09.918098", + "step": 5357, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002597134152892977, + "timestamp": "2025-09-10 02:27:09.929734", + "step": 5358, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:09.963169", + "step": 5358, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024505850160494447, + "timestamp": "2025-09-10 02:27:09.967403", + "step": 5359, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:09.998959", + "step": 5359, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003296424984000623, + "timestamp": "2025-09-10 02:27:10.024044", + "step": 5360, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:10.055241", + "step": 5360, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009543310734443367, + "timestamp": "2025-09-10 02:27:10.057851", + "step": 5361, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:10.091168", + "step": 5361, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012925105693284422, + "timestamp": "2025-09-10 02:27:10.098638", + "step": 5362, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:10.130346", + "step": 5362, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022506927780341357, + "timestamp": "2025-09-10 02:27:10.137015", + "step": 5363, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:27:10.175004", + "step": 5363, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027559816953726113, + "timestamp": "2025-09-10 02:27:10.211508", + "step": 5364, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:10.243345", + "step": 5364, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.342889148276299e-05, + "timestamp": "2025-09-10 02:27:10.247674", + "step": 5365, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:10.279417", + "step": 5365, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005296029266901314, + "timestamp": "2025-09-10 02:27:10.286937", + "step": 5366, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:27:10.321567", + "step": 5366, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015757219807710499, + "timestamp": "2025-09-10 02:27:10.335385", + "step": 5367, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:10.367196", + "step": 5367, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003111243713647127, + "timestamp": "2025-09-10 02:27:10.394588", + "step": 5368, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:10.427238", + "step": 5368, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011776711326092482, + "timestamp": "2025-09-10 02:27:10.433995", + "step": 5369, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:10.465234", + "step": 5369, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025284269941039383, + "timestamp": "2025-09-10 02:27:10.477006", + "step": 5370, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:10.508081", + "step": 5370, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006649411516264081, + "timestamp": "2025-09-10 02:27:10.520422", + "step": 5371, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:10.551453", + "step": 5371, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001548093743622303, + "timestamp": "2025-09-10 02:27:10.579634", + "step": 5372, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:10.611150", + "step": 5372, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003271247842349112, + "timestamp": "2025-09-10 02:27:10.615775", + "step": 5373, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:10.647350", + "step": 5373, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005300581920892, + "timestamp": "2025-09-10 02:27:10.654716", + "step": 5374, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:10.687559", + "step": 5374, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005442704423330724, + "timestamp": "2025-09-10 02:27:10.694191", + "step": 5375, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:10.727002", + "step": 5375, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001799498131731525, + "timestamp": "2025-09-10 02:27:10.754945", + "step": 5376, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:10.788594", + "step": 5376, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002708194369915873, + "timestamp": "2025-09-10 02:27:10.793486", + "step": 5377, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:10.825847", + "step": 5377, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004976264201104641, + "timestamp": "2025-09-10 02:27:10.837370", + "step": 5378, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:10.868671", + "step": 5378, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040135084418579936, + "timestamp": "2025-09-10 02:27:10.875444", + "step": 5379, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:10.909459", + "step": 5379, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001543848484288901, + "timestamp": "2025-09-10 02:27:10.934664", + "step": 5380, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:10.967418", + "step": 5380, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021023396402597427, + "timestamp": "2025-09-10 02:27:10.974542", + "step": 5381, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:11.006146", + "step": 5381, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035028725396841764, + "timestamp": "2025-09-10 02:27:11.009911", + "step": 5382, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:11.041403", + "step": 5382, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032905121333897114, + "timestamp": "2025-09-10 02:27:11.048083", + "step": 5383, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:11.079922", + "step": 5383, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008346849936060607, + "timestamp": "2025-09-10 02:27:11.108219", + "step": 5384, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:11.139874", + "step": 5384, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020959800167474896, + "timestamp": "2025-09-10 02:27:11.144722", + "step": 5385, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:11.176522", + "step": 5385, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.455058559775352e-05, + "timestamp": "2025-09-10 02:27:11.186299", + "step": 5386, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:11.217775", + "step": 5386, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021998195734340698, + "timestamp": "2025-09-10 02:27:11.224526", + "step": 5387, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:27:11.264673", + "step": 5387, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021652230992913246, + "timestamp": "2025-09-10 02:27:11.301731", + "step": 5388, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:11.334326", + "step": 5388, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016941409558057785, + "timestamp": "2025-09-10 02:27:11.338505", + "step": 5389, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:11.369358", + "step": 5389, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002850510645657778, + "timestamp": "2025-09-10 02:27:11.372473", + "step": 5390, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:11.403900", + "step": 5390, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029228352941572666, + "timestamp": "2025-09-10 02:27:11.410340", + "step": 5391, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:11.441958", + "step": 5391, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01626124419271946, + "timestamp": "2025-09-10 02:27:11.469556", + "step": 5392, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:11.503536", + "step": 5392, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001065724529325962, + "timestamp": "2025-09-10 02:27:11.512032", + "step": 5393, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:11.549882", + "step": 5393, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003987684322055429, + "timestamp": "2025-09-10 02:27:11.563574", + "step": 5394, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:11.595440", + "step": 5394, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007257405668497086, + "timestamp": "2025-09-10 02:27:11.599672", + "step": 5395, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:11.631068", + "step": 5395, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003991567995399237, + "timestamp": "2025-09-10 02:27:11.658897", + "step": 5396, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:11.689820", + "step": 5396, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001748523791320622, + "timestamp": "2025-09-10 02:27:11.695212", + "step": 5397, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:11.728539", + "step": 5397, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017363729421049356, + "timestamp": "2025-09-10 02:27:11.735372", + "step": 5398, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:27:11.774608", + "step": 5398, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008585135801695287, + "timestamp": "2025-09-10 02:27:11.790251", + "step": 5399, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:11.821849", + "step": 5399, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017365036183036864, + "timestamp": "2025-09-10 02:27:11.850439", + "step": 5400, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:11.881998", + "step": 5400, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003011637891177088, + "timestamp": "2025-09-10 02:27:11.886504", + "step": 5401, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:11.917983", + "step": 5401, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03174243122339249, + "timestamp": "2025-09-10 02:27:11.925005", + "step": 5402, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:11.956591", + "step": 5402, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016650068573653698, + "timestamp": "2025-09-10 02:27:11.963318", + "step": 5403, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:11.995075", + "step": 5403, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002632165269460529, + "timestamp": "2025-09-10 02:27:12.027740", + "step": 5404, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:12.059862", + "step": 5404, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.19372255844064e-05, + "timestamp": "2025-09-10 02:27:12.063799", + "step": 5405, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:12.096073", + "step": 5405, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001446278765797615, + "timestamp": "2025-09-10 02:27:12.102727", + "step": 5406, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:12.134862", + "step": 5406, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006326402653940022, + "timestamp": "2025-09-10 02:27:12.141586", + "step": 5407, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:12.174294", + "step": 5407, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.538559970678762e-05, + "timestamp": "2025-09-10 02:27:12.202505", + "step": 5408, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:12.234243", + "step": 5408, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002074205782264471, + "timestamp": "2025-09-10 02:27:12.238921", + "step": 5409, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:27:12.278081", + "step": 5409, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004352888558059931, + "timestamp": "2025-09-10 02:27:12.294251", + "step": 5410, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:12.325875", + "step": 5410, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003916619752999395, + "timestamp": "2025-09-10 02:27:12.336292", + "step": 5411, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:12.367817", + "step": 5411, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.0024420236004516e-05, + "timestamp": "2025-09-10 02:27:12.395910", + "step": 5412, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:12.427527", + "step": 5412, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003417480329517275, + "timestamp": "2025-09-10 02:27:12.432346", + "step": 5413, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:12.464295", + "step": 5413, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04961821064352989, + "timestamp": "2025-09-10 02:27:12.470898", + "step": 5414, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:12.503543", + "step": 5414, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000651273294351995, + "timestamp": "2025-09-10 02:27:12.515505", + "step": 5415, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:12.547695", + "step": 5415, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005461532273329794, + "timestamp": "2025-09-10 02:27:12.574876", + "step": 5416, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:12.609568", + "step": 5416, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011130100028822199, + "timestamp": "2025-09-10 02:27:12.617143", + "step": 5417, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:12.648604", + "step": 5417, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004985901992768049, + "timestamp": "2025-09-10 02:27:12.655668", + "step": 5418, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:12.687344", + "step": 5418, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047179448301903903, + "timestamp": "2025-09-10 02:27:12.699601", + "step": 5419, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:12.732923", + "step": 5419, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04157213494181633, + "timestamp": "2025-09-10 02:27:12.757955", + "step": 5420, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:12.789802", + "step": 5420, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000143712037242949, + "timestamp": "2025-09-10 02:27:12.799045", + "step": 5421, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:12.832584", + "step": 5421, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028550736606121063, + "timestamp": "2025-09-10 02:27:12.845910", + "step": 5422, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:12.878184", + "step": 5422, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014410755829885602, + "timestamp": "2025-09-10 02:27:12.885163", + "step": 5423, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:12.917557", + "step": 5423, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013774879334960133, + "timestamp": "2025-09-10 02:27:12.941980", + "step": 5424, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:12.974130", + "step": 5424, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003147267270833254, + "timestamp": "2025-09-10 02:27:12.986753", + "step": 5425, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:27:13.019181", + "step": 5425, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036901801358908415, + "timestamp": "2025-09-10 02:27:13.021451", + "step": 5426, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:13.052834", + "step": 5426, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.131115803029388e-05, + "timestamp": "2025-09-10 02:27:13.062813", + "step": 5427, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:13.094571", + "step": 5427, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034638322540558875, + "timestamp": "2025-09-10 02:27:13.121982", + "step": 5428, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:13.152789", + "step": 5428, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003097867302130908, + "timestamp": "2025-09-10 02:27:13.157434", + "step": 5429, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:13.188718", + "step": 5429, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006205638055689633, + "timestamp": "2025-09-10 02:27:13.198938", + "step": 5430, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:13.234382", + "step": 5430, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004306466784328222, + "timestamp": "2025-09-10 02:27:13.248054", + "step": 5431, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:13.280356", + "step": 5431, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013333893148228526, + "timestamp": "2025-09-10 02:27:13.311264", + "step": 5432, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:27:13.348541", + "step": 5432, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003753043944016099, + "timestamp": "2025-09-10 02:27:13.363655", + "step": 5433, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:13.395676", + "step": 5433, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018160228501074016, + "timestamp": "2025-09-10 02:27:13.406258", + "step": 5434, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:13.437458", + "step": 5434, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016118159983307123, + "timestamp": "2025-09-10 02:27:13.444359", + "step": 5435, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:13.475084", + "step": 5435, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042566441697999835, + "timestamp": "2025-09-10 02:27:13.506907", + "step": 5436, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:13.538508", + "step": 5436, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005374281900003552, + "timestamp": "2025-09-10 02:27:13.542905", + "step": 5437, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:13.573793", + "step": 5437, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007307881605811417, + "timestamp": "2025-09-10 02:27:13.580832", + "step": 5438, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:13.611475", + "step": 5438, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004055744793731719, + "timestamp": "2025-09-10 02:27:13.615784", + "step": 5439, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:27:23.615052", + "step": 5439, + "epoch": 3 + }, + { + "type": "pplx", + "content": 20426999.100602426, + "timestamp": "2025-09-10 02:27:23.619545", + "step": 5439, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:23.651955", + "step": 5439, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016575765039306134, + "timestamp": "2025-09-10 02:27:23.676033", + "step": 5440, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:23.705982", + "step": 5440, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043351706699468195, + "timestamp": "2025-09-10 02:27:23.707867", + "step": 5441, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:23.736882", + "step": 5441, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011763531947508454, + "timestamp": "2025-09-10 02:27:23.741399", + "step": 5442, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:27:23.779772", + "step": 5442, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002184472105000168, + "timestamp": "2025-09-10 02:27:23.795688", + "step": 5443, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:23.826560", + "step": 5443, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006947829853743315, + "timestamp": "2025-09-10 02:27:23.853998", + "step": 5444, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:23.884265", + "step": 5444, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013986548583488911, + "timestamp": "2025-09-10 02:27:23.894618", + "step": 5445, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:23.925075", + "step": 5445, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001293918932788074, + "timestamp": "2025-09-10 02:27:23.932472", + "step": 5446, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:23.962245", + "step": 5446, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006181861972436309, + "timestamp": "2025-09-10 02:27:23.973063", + "step": 5447, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:24.002436", + "step": 5447, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000459133880212903, + "timestamp": "2025-09-10 02:27:24.027256", + "step": 5448, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:24.057967", + "step": 5448, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009505641646683216, + "timestamp": "2025-09-10 02:27:24.062630", + "step": 5449, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:24.095581", + "step": 5449, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008678924641571939, + "timestamp": "2025-09-10 02:27:24.108930", + "step": 5450, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:27:24.147699", + "step": 5450, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002460898831486702, + "timestamp": "2025-09-10 02:27:24.163384", + "step": 5451, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:24.193907", + "step": 5451, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009736836655065417, + "timestamp": "2025-09-10 02:27:24.222218", + "step": 5452, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:24.251808", + "step": 5452, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001476912439102307, + "timestamp": "2025-09-10 02:27:24.257124", + "step": 5453, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:27:24.287170", + "step": 5453, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00234299642033875, + "timestamp": "2025-09-10 02:27:24.289140", + "step": 5454, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:24.319252", + "step": 5454, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003727012954186648, + "timestamp": "2025-09-10 02:27:24.323506", + "step": 5455, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:24.353122", + "step": 5455, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016866849910002202, + "timestamp": "2025-09-10 02:27:24.380665", + "step": 5456, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:24.411019", + "step": 5456, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030947296181693673, + "timestamp": "2025-09-10 02:27:24.413046", + "step": 5457, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:24.443865", + "step": 5457, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004413680580910295, + "timestamp": "2025-09-10 02:27:24.448059", + "step": 5458, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:24.481001", + "step": 5458, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032807476236484945, + "timestamp": "2025-09-10 02:27:24.485453", + "step": 5459, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:24.515733", + "step": 5459, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025154382456094027, + "timestamp": "2025-09-10 02:27:24.549197", + "step": 5460, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:24.585862", + "step": 5460, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001103718881495297, + "timestamp": "2025-09-10 02:27:24.590640", + "step": 5461, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:24.621484", + "step": 5461, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004202440322842449, + "timestamp": "2025-09-10 02:27:24.631686", + "step": 5462, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:24.662057", + "step": 5462, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002868052397388965, + "timestamp": "2025-09-10 02:27:24.669186", + "step": 5463, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:24.704523", + "step": 5463, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005337927141226828, + "timestamp": "2025-09-10 02:27:24.738823", + "step": 5464, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:24.773003", + "step": 5464, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000878959137480706, + "timestamp": "2025-09-10 02:27:24.777704", + "step": 5465, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:24.807633", + "step": 5465, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041015897295437753, + "timestamp": "2025-09-10 02:27:24.814729", + "step": 5466, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:24.845250", + "step": 5466, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008768526837229729, + "timestamp": "2025-09-10 02:27:24.856091", + "step": 5467, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:24.886823", + "step": 5467, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015605808584950864, + "timestamp": "2025-09-10 02:27:24.918482", + "step": 5468, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:24.948828", + "step": 5468, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048326136311516166, + "timestamp": "2025-09-10 02:27:24.954048", + "step": 5469, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:24.983806", + "step": 5469, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007095988839864731, + "timestamp": "2025-09-10 02:27:24.988163", + "step": 5470, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:25.018745", + "step": 5470, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030129605438560247, + "timestamp": "2025-09-10 02:27:25.026198", + "step": 5471, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:27:25.068239", + "step": 5471, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008913605473935604, + "timestamp": "2025-09-10 02:27:25.106488", + "step": 5472, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:25.137125", + "step": 5472, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003532721020746976, + "timestamp": "2025-09-10 02:27:25.147566", + "step": 5473, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:25.177878", + "step": 5473, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001848796382546425, + "timestamp": "2025-09-10 02:27:25.184808", + "step": 5474, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:25.214781", + "step": 5474, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004907096736133099, + "timestamp": "2025-09-10 02:27:25.221615", + "step": 5475, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:25.252159", + "step": 5475, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004714152601081878, + "timestamp": "2025-09-10 02:27:25.285615", + "step": 5476, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:27:25.318176", + "step": 5476, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014040955575183034, + "timestamp": "2025-09-10 02:27:25.331456", + "step": 5477, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:25.361847", + "step": 5477, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022547480184584856, + "timestamp": "2025-09-10 02:27:25.368579", + "step": 5478, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:25.399396", + "step": 5478, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004808087833225727, + "timestamp": "2025-09-10 02:27:25.406197", + "step": 5479, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:25.436828", + "step": 5479, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024939377908594906, + "timestamp": "2025-09-10 02:27:25.464613", + "step": 5480, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:27:25.500516", + "step": 5480, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003376358072273433, + "timestamp": "2025-09-10 02:27:25.515696", + "step": 5481, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:25.545597", + "step": 5481, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017180813301820308, + "timestamp": "2025-09-10 02:27:25.552497", + "step": 5482, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:25.592196", + "step": 5482, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040549953700974584, + "timestamp": "2025-09-10 02:27:25.594957", + "step": 5483, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:25.625545", + "step": 5483, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006368904723785818, + "timestamp": "2025-09-10 02:27:25.658634", + "step": 5484, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:25.689629", + "step": 5484, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018317217472940683, + "timestamp": "2025-09-10 02:27:25.694781", + "step": 5485, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:25.724875", + "step": 5485, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001097510103136301, + "timestamp": "2025-09-10 02:27:25.728890", + "step": 5486, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:25.760354", + "step": 5486, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001433340657968074, + "timestamp": "2025-09-10 02:27:25.772468", + "step": 5487, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:25.802978", + "step": 5487, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005603828467428684, + "timestamp": "2025-09-10 02:27:25.831313", + "step": 5488, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:25.861933", + "step": 5488, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009804172441363335, + "timestamp": "2025-09-10 02:27:25.867054", + "step": 5489, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:25.896960", + "step": 5489, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029433060437440872, + "timestamp": "2025-09-10 02:27:25.901378", + "step": 5490, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:25.932233", + "step": 5490, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031838512513786554, + "timestamp": "2025-09-10 02:27:25.938904", + "step": 5491, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:25.971123", + "step": 5491, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012118567246943712, + "timestamp": "2025-09-10 02:27:26.003044", + "step": 5492, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:26.033688", + "step": 5492, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.553322211606428e-05, + "timestamp": "2025-09-10 02:27:26.038348", + "step": 5493, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:26.068370", + "step": 5493, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006994387367740273, + "timestamp": "2025-09-10 02:27:26.075270", + "step": 5494, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:26.105413", + "step": 5494, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013065806706435978, + "timestamp": "2025-09-10 02:27:26.113048", + "step": 5495, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:26.147661", + "step": 5495, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004590437456499785, + "timestamp": "2025-09-10 02:27:26.182277", + "step": 5496, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:26.214107", + "step": 5496, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011019222438335419, + "timestamp": "2025-09-10 02:27:26.218755", + "step": 5497, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:27:26.256942", + "step": 5497, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003960966714657843, + "timestamp": "2025-09-10 02:27:26.272845", + "step": 5498, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:26.303273", + "step": 5498, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002474244683980942, + "timestamp": "2025-09-10 02:27:26.315523", + "step": 5499, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:27:26.352576", + "step": 5499, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017456304281949997, + "timestamp": "2025-09-10 02:27:26.389120", + "step": 5500, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 5500", + "timestamp": "2025-09-10 02:27:31.536850", + "step": 5500, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:31.579077", + "step": 5500, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002106861909851432, + "timestamp": "2025-09-10 02:27:31.582270", + "step": 5501, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:27:31.629193", + "step": 5501, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021317604114301503, + "timestamp": "2025-09-10 02:27:31.642886", + "step": 5502, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:31.674071", + "step": 5502, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021606599329970777, + "timestamp": "2025-09-10 02:27:31.680617", + "step": 5503, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:31.710421", + "step": 5503, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007330483640544116, + "timestamp": "2025-09-10 02:27:31.737988", + "step": 5504, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:27:31.781738", + "step": 5504, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000325193686876446, + "timestamp": "2025-09-10 02:27:31.800720", + "step": 5505, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:31.833182", + "step": 5505, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002977077674586326, + "timestamp": "2025-09-10 02:27:31.840308", + "step": 5506, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:31.871361", + "step": 5506, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009078510920517147, + "timestamp": "2025-09-10 02:27:31.878302", + "step": 5507, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:31.909839", + "step": 5507, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005771205294877291, + "timestamp": "2025-09-10 02:27:31.938113", + "step": 5508, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:31.968598", + "step": 5508, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.6392182159470394e-05, + "timestamp": "2025-09-10 02:27:31.972981", + "step": 5509, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:32.006458", + "step": 5509, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001654118241276592, + "timestamp": "2025-09-10 02:27:32.019865", + "step": 5510, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:32.059249", + "step": 5510, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002515481901355088, + "timestamp": "2025-09-10 02:27:32.072881", + "step": 5511, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:32.104293", + "step": 5511, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001816750009311363, + "timestamp": "2025-09-10 02:27:32.132676", + "step": 5512, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:32.162778", + "step": 5512, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022820988669991493, + "timestamp": "2025-09-10 02:27:32.164857", + "step": 5513, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:32.194948", + "step": 5513, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005616779671981931, + "timestamp": "2025-09-10 02:27:32.197489", + "step": 5514, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:27:32.227397", + "step": 5514, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.715394895058125e-05, + "timestamp": "2025-09-10 02:27:32.229522", + "step": 5515, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:32.259843", + "step": 5515, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003506176406517625, + "timestamp": "2025-09-10 02:27:32.293269", + "step": 5516, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:32.323168", + "step": 5516, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001584869751241058, + "timestamp": "2025-09-10 02:27:32.325243", + "step": 5517, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:32.358806", + "step": 5517, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006221079966053367, + "timestamp": "2025-09-10 02:27:32.372521", + "step": 5518, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:32.403453", + "step": 5518, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.248010372975841e-05, + "timestamp": "2025-09-10 02:27:32.410382", + "step": 5519, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:32.440771", + "step": 5519, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002929776383098215, + "timestamp": "2025-09-10 02:27:32.472429", + "step": 5520, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:32.506997", + "step": 5520, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004986113053746521, + "timestamp": "2025-09-10 02:27:32.508976", + "step": 5521, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:32.539000", + "step": 5521, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017840255168266594, + "timestamp": "2025-09-10 02:27:32.545758", + "step": 5522, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:32.578145", + "step": 5522, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.241130515467376e-05, + "timestamp": "2025-09-10 02:27:32.584978", + "step": 5523, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:32.618132", + "step": 5523, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008913822821341455, + "timestamp": "2025-09-10 02:27:32.641737", + "step": 5524, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:32.672034", + "step": 5524, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020360689086373895, + "timestamp": "2025-09-10 02:27:32.677021", + "step": 5525, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:32.710737", + "step": 5525, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012081407476216555, + "timestamp": "2025-09-10 02:27:32.724427", + "step": 5526, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:32.754305", + "step": 5526, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008983220905065536, + "timestamp": "2025-09-10 02:27:32.761358", + "step": 5527, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:32.791412", + "step": 5527, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018443951557856053, + "timestamp": "2025-09-10 02:27:32.816320", + "step": 5528, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:32.847149", + "step": 5528, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016453374701086432, + "timestamp": "2025-09-10 02:27:32.855023", + "step": 5529, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:32.886789", + "step": 5529, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020828154811169952, + "timestamp": "2025-09-10 02:27:32.897005", + "step": 5530, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:32.932004", + "step": 5530, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003416097315493971, + "timestamp": "2025-09-10 02:27:32.945746", + "step": 5531, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:32.978891", + "step": 5531, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005632195970974863, + "timestamp": "2025-09-10 02:27:33.013047", + "step": 5532, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:33.044070", + "step": 5532, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005745100323110819, + "timestamp": "2025-09-10 02:27:33.051515", + "step": 5533, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:33.084358", + "step": 5533, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000916880089789629, + "timestamp": "2025-09-10 02:27:33.088706", + "step": 5534, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:33.118543", + "step": 5534, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013511795259546489, + "timestamp": "2025-09-10 02:27:33.125244", + "step": 5535, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:33.156478", + "step": 5535, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011048159562051296, + "timestamp": "2025-09-10 02:27:33.184951", + "step": 5536, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:27:33.214401", + "step": 5536, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008397336350753903, + "timestamp": "2025-09-10 02:27:33.216071", + "step": 5537, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:33.245476", + "step": 5537, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011787625262513757, + "timestamp": "2025-09-10 02:27:33.249811", + "step": 5538, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:33.281454", + "step": 5538, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012026058975607157, + "timestamp": "2025-09-10 02:27:33.291796", + "step": 5539, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:33.321747", + "step": 5539, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03373594582080841, + "timestamp": "2025-09-10 02:27:33.346776", + "step": 5540, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:33.376340", + "step": 5540, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018737561185844243, + "timestamp": "2025-09-10 02:27:33.380012", + "step": 5541, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:33.414380", + "step": 5541, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.8746190916281193e-05, + "timestamp": "2025-09-10 02:27:33.421294", + "step": 5542, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:33.451445", + "step": 5542, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03155756741762161, + "timestamp": "2025-09-10 02:27:33.463526", + "step": 5543, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:33.494717", + "step": 5543, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006670065922662616, + "timestamp": "2025-09-10 02:27:33.523226", + "step": 5544, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:33.554422", + "step": 5544, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04387173801660538, + "timestamp": "2025-09-10 02:27:33.559135", + "step": 5545, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:33.593139", + "step": 5545, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019743894517887384, + "timestamp": "2025-09-10 02:27:33.605635", + "step": 5546, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:33.639149", + "step": 5546, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.449820648413152e-05, + "timestamp": "2025-09-10 02:27:33.648609", + "step": 5547, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:33.680150", + "step": 5547, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011188429780304432, + "timestamp": "2025-09-10 02:27:33.707910", + "step": 5548, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:33.738430", + "step": 5548, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005150972865521908, + "timestamp": "2025-09-10 02:27:33.743746", + "step": 5549, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:33.778426", + "step": 5549, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003233766183257103, + "timestamp": "2025-09-10 02:27:33.785737", + "step": 5550, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:33.819092", + "step": 5550, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020095381478313357, + "timestamp": "2025-09-10 02:27:33.831112", + "step": 5551, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:27:33.870353", + "step": 5551, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017904266132973135, + "timestamp": "2025-09-10 02:27:33.907089", + "step": 5552, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:33.941168", + "step": 5552, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.078414738411084e-05, + "timestamp": "2025-09-10 02:27:33.943772", + "step": 5553, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:33.976793", + "step": 5553, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003823090228252113, + "timestamp": "2025-09-10 02:27:33.983643", + "step": 5554, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:27:34.025232", + "step": 5554, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005709293182007968, + "timestamp": "2025-09-10 02:27:34.042566", + "step": 5555, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:34.074986", + "step": 5555, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023254666302818805, + "timestamp": "2025-09-10 02:27:34.102934", + "step": 5556, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:34.135109", + "step": 5556, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022391592210624367, + "timestamp": "2025-09-10 02:27:34.137096", + "step": 5557, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:34.170741", + "step": 5557, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015340792946517467, + "timestamp": "2025-09-10 02:27:34.178292", + "step": 5558, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:27:34.219562", + "step": 5558, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001337612047791481, + "timestamp": "2025-09-10 02:27:34.235720", + "step": 5559, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:34.272146", + "step": 5559, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005941048148088157, + "timestamp": "2025-09-10 02:27:34.306749", + "step": 5560, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:27:34.340343", + "step": 5560, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025993268354795873, + "timestamp": "2025-09-10 02:27:34.353688", + "step": 5561, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:34.385854", + "step": 5561, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000905154156498611, + "timestamp": "2025-09-10 02:27:34.389899", + "step": 5562, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:34.428220", + "step": 5562, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006523271440528333, + "timestamp": "2025-09-10 02:27:34.432679", + "step": 5563, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:34.474553", + "step": 5563, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.019279291853309e-05, + "timestamp": "2025-09-10 02:27:34.506256", + "step": 5564, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:34.536315", + "step": 5564, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.2420513384277e-05, + "timestamp": "2025-09-10 02:27:34.540790", + "step": 5565, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:34.571997", + "step": 5565, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020063482224941254, + "timestamp": "2025-09-10 02:27:34.581502", + "step": 5566, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:27:34.632019", + "step": 5566, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006023825262673199, + "timestamp": "2025-09-10 02:27:34.649384", + "step": 5567, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:27:34.689284", + "step": 5567, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001301374431932345, + "timestamp": "2025-09-10 02:27:34.726061", + "step": 5568, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:34.758512", + "step": 5568, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000419637217419222, + "timestamp": "2025-09-10 02:27:34.762345", + "step": 5569, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:34.793133", + "step": 5569, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030135762062855065, + "timestamp": "2025-09-10 02:27:34.802810", + "step": 5570, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:34.835555", + "step": 5570, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.938400035025552e-05, + "timestamp": "2025-09-10 02:27:34.847463", + "step": 5571, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:34.878126", + "step": 5571, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001446371665224433, + "timestamp": "2025-09-10 02:27:34.905980", + "step": 5572, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:34.937476", + "step": 5572, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001281161530641839, + "timestamp": "2025-09-10 02:27:34.942245", + "step": 5573, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:27:34.976733", + "step": 5573, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013364390179049224, + "timestamp": "2025-09-10 02:27:34.990743", + "step": 5574, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:35.024991", + "step": 5574, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001239602715941146, + "timestamp": "2025-09-10 02:27:35.038337", + "step": 5575, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:35.069263", + "step": 5575, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047443489893339574, + "timestamp": "2025-09-10 02:27:35.097251", + "step": 5576, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:35.127534", + "step": 5576, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024012614449020475, + "timestamp": "2025-09-10 02:27:35.130325", + "step": 5577, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:35.160617", + "step": 5577, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003875931433867663, + "timestamp": "2025-09-10 02:27:35.167435", + "step": 5578, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:35.197741", + "step": 5578, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003180662461090833, + "timestamp": "2025-09-10 02:27:35.208661", + "step": 5579, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:35.239848", + "step": 5579, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013746933836955577, + "timestamp": "2025-09-10 02:27:35.267537", + "step": 5580, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:35.298635", + "step": 5580, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005690049845725298, + "timestamp": "2025-09-10 02:27:35.303999", + "step": 5581, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:35.335409", + "step": 5581, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014996285608503968, + "timestamp": "2025-09-10 02:27:35.342161", + "step": 5582, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:35.374179", + "step": 5582, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021600407490041107, + "timestamp": "2025-09-10 02:27:35.380733", + "step": 5583, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:35.411851", + "step": 5583, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007192405755631626, + "timestamp": "2025-09-10 02:27:35.436830", + "step": 5584, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:35.467895", + "step": 5584, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003458319406490773, + "timestamp": "2025-09-10 02:27:35.472981", + "step": 5585, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:35.503975", + "step": 5585, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002480298571754247, + "timestamp": "2025-09-10 02:27:35.506323", + "step": 5586, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:27:45.964600", + "step": 5586, + "epoch": 3 + }, + { + "type": "pplx", + "content": 21153895.963864572, + "timestamp": "2025-09-10 02:27:45.968772", + "step": 5586, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:45.999647", + "step": 5586, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005043831770308316, + "timestamp": "2025-09-10 02:27:46.005515", + "step": 5587, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:46.044554", + "step": 5587, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019294557569082826, + "timestamp": "2025-09-10 02:27:46.078716", + "step": 5588, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:46.110185", + "step": 5588, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006769891479052603, + "timestamp": "2025-09-10 02:27:46.114567", + "step": 5589, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:46.145830", + "step": 5589, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010750783985713497, + "timestamp": "2025-09-10 02:27:46.153069", + "step": 5590, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:46.184008", + "step": 5590, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029583461582660675, + "timestamp": "2025-09-10 02:27:46.188447", + "step": 5591, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:46.218761", + "step": 5591, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015220470959320664, + "timestamp": "2025-09-10 02:27:46.244206", + "step": 5592, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:46.275648", + "step": 5592, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018189029651694, + "timestamp": "2025-09-10 02:27:46.283544", + "step": 5593, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:46.313685", + "step": 5593, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017844110261648893, + "timestamp": "2025-09-10 02:27:46.320745", + "step": 5594, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:46.352390", + "step": 5594, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002676573349162936, + "timestamp": "2025-09-10 02:27:46.364916", + "step": 5595, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:46.396379", + "step": 5595, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013415786670520902, + "timestamp": "2025-09-10 02:27:46.428107", + "step": 5596, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:46.459638", + "step": 5596, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009746703435666859, + "timestamp": "2025-09-10 02:27:46.467143", + "step": 5597, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:46.498227", + "step": 5597, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008030079188756645, + "timestamp": "2025-09-10 02:27:46.505002", + "step": 5598, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:46.536683", + "step": 5598, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012600510381162167, + "timestamp": "2025-09-10 02:27:46.546818", + "step": 5599, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:46.577027", + "step": 5599, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023556490486953408, + "timestamp": "2025-09-10 02:27:46.605544", + "step": 5600, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 784 + ], + "flops": 23255845310656 + }, + "timestamp": "2025-09-10 02:27:46.668269", + "step": 5600, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002329291310161352, + "timestamp": "2025-09-10 02:27:46.695192", + "step": 5601, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:46.727012", + "step": 5601, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002381420199526474, + "timestamp": "2025-09-10 02:27:46.739574", + "step": 5602, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:46.770291", + "step": 5602, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032722530886530876, + "timestamp": "2025-09-10 02:27:46.774173", + "step": 5603, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:46.805995", + "step": 5603, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039383722469210625, + "timestamp": "2025-09-10 02:27:46.837715", + "step": 5604, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:46.869559", + "step": 5604, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016721284482628107, + "timestamp": "2025-09-10 02:27:46.877324", + "step": 5605, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:27:46.917602", + "step": 5605, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009820311097428203, + "timestamp": "2025-09-10 02:27:46.933729", + "step": 5606, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:46.968616", + "step": 5606, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013964021345600486, + "timestamp": "2025-09-10 02:27:46.971092", + "step": 5607, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:47.001901", + "step": 5607, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012165692896815017, + "timestamp": "2025-09-10 02:27:47.029468", + "step": 5608, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:47.060590", + "step": 5608, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014244894264265895, + "timestamp": "2025-09-10 02:27:47.065971", + "step": 5609, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:47.097599", + "step": 5609, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.5444008972262964e-05, + "timestamp": "2025-09-10 02:27:47.102060", + "step": 5610, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:47.134406", + "step": 5610, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.020029067993164062, + "timestamp": "2025-09-10 02:27:47.141398", + "step": 5611, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:47.173807", + "step": 5611, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018498908320907503, + "timestamp": "2025-09-10 02:27:47.198781", + "step": 5612, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:47.230234", + "step": 5612, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031777346157468855, + "timestamp": "2025-09-10 02:27:47.235598", + "step": 5613, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:47.270740", + "step": 5613, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015662026999052614, + "timestamp": "2025-09-10 02:27:47.278215", + "step": 5614, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:47.309072", + "step": 5614, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004629619943443686, + "timestamp": "2025-09-10 02:27:47.312933", + "step": 5615, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:47.343858", + "step": 5615, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.026808402268216e-05, + "timestamp": "2025-09-10 02:27:47.372453", + "step": 5616, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:47.412614", + "step": 5616, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044115257333032787, + "timestamp": "2025-09-10 02:27:47.417106", + "step": 5617, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:27:47.459530", + "step": 5617, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015083990001585335, + "timestamp": "2025-09-10 02:27:47.475170", + "step": 5618, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:47.515880", + "step": 5618, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014641489542555064, + "timestamp": "2025-09-10 02:27:47.525490", + "step": 5619, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:47.558296", + "step": 5619, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002689136890694499, + "timestamp": "2025-09-10 02:27:47.583300", + "step": 5620, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:27:47.632703", + "step": 5620, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016491406131535769, + "timestamp": "2025-09-10 02:27:47.649985", + "step": 5621, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:47.686849", + "step": 5621, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010262165596941486, + "timestamp": "2025-09-10 02:27:47.700613", + "step": 5622, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:47.730764", + "step": 5622, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035222251899540424, + "timestamp": "2025-09-10 02:27:47.734893", + "step": 5623, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:47.768301", + "step": 5623, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014401556109078228, + "timestamp": "2025-09-10 02:27:47.796183", + "step": 5624, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:47.829146", + "step": 5624, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.734489867696539e-05, + "timestamp": "2025-09-10 02:27:47.834206", + "step": 5625, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:47.868850", + "step": 5625, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001005322701530531, + "timestamp": "2025-09-10 02:27:47.875667", + "step": 5626, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:47.908700", + "step": 5626, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047445675591006875, + "timestamp": "2025-09-10 02:27:47.912771", + "step": 5627, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:47.946188", + "step": 5627, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01834898255765438, + "timestamp": "2025-09-10 02:27:47.974549", + "step": 5628, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:48.006496", + "step": 5628, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011012358590960503, + "timestamp": "2025-09-10 02:27:48.011573", + "step": 5629, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:48.045571", + "step": 5629, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.657299334416166e-05, + "timestamp": "2025-09-10 02:27:48.049587", + "step": 5630, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:48.081291", + "step": 5630, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.444791117450222e-05, + "timestamp": "2025-09-10 02:27:48.085451", + "step": 5631, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:48.116422", + "step": 5631, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001294314133701846, + "timestamp": "2025-09-10 02:27:48.144885", + "step": 5632, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:48.175511", + "step": 5632, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005845736595802009, + "timestamp": "2025-09-10 02:27:48.181018", + "step": 5633, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:48.212713", + "step": 5633, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001259546697838232, + "timestamp": "2025-09-10 02:27:48.219610", + "step": 5634, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:48.250841", + "step": 5634, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.173291280400008e-05, + "timestamp": "2025-09-10 02:27:48.257649", + "step": 5635, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:48.289711", + "step": 5635, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.338084782939404e-05, + "timestamp": "2025-09-10 02:27:48.323187", + "step": 5636, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:48.354729", + "step": 5636, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014086895622313023, + "timestamp": "2025-09-10 02:27:48.363501", + "step": 5637, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:48.394692", + "step": 5637, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006178818293847144, + "timestamp": "2025-09-10 02:27:48.399112", + "step": 5638, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:48.430466", + "step": 5638, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003365145530551672, + "timestamp": "2025-09-10 02:27:48.440750", + "step": 5639, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:27:48.475898", + "step": 5639, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015990216343197972, + "timestamp": "2025-09-10 02:27:48.510610", + "step": 5640, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:48.541734", + "step": 5640, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005560553981922567, + "timestamp": "2025-09-10 02:27:48.546755", + "step": 5641, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:48.578165", + "step": 5641, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001505583932157606, + "timestamp": "2025-09-10 02:27:48.585776", + "step": 5642, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:48.616419", + "step": 5642, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036215136060491204, + "timestamp": "2025-09-10 02:27:48.626544", + "step": 5643, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:48.658433", + "step": 5643, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019817725114990026, + "timestamp": "2025-09-10 02:27:48.683629", + "step": 5644, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:48.714705", + "step": 5644, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002911986375693232, + "timestamp": "2025-09-10 02:27:48.720004", + "step": 5645, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:48.751853", + "step": 5645, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006733777699992061, + "timestamp": "2025-09-10 02:27:48.762610", + "step": 5646, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:48.793476", + "step": 5646, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010006874072132632, + "timestamp": "2025-09-10 02:27:48.797969", + "step": 5647, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:48.829166", + "step": 5647, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048814056208357215, + "timestamp": "2025-09-10 02:27:48.862325", + "step": 5648, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:48.894618", + "step": 5648, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001697771018370986, + "timestamp": "2025-09-10 02:27:48.899692", + "step": 5649, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:48.930035", + "step": 5649, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001428636023774743, + "timestamp": "2025-09-10 02:27:48.934199", + "step": 5650, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:48.965831", + "step": 5650, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000209279969567433, + "timestamp": "2025-09-10 02:27:48.976845", + "step": 5651, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:49.007514", + "step": 5651, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015839662228245288, + "timestamp": "2025-09-10 02:27:49.032626", + "step": 5652, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:49.063881", + "step": 5652, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.35560601647012e-05, + "timestamp": "2025-09-10 02:27:49.068742", + "step": 5653, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:49.099818", + "step": 5653, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006064533954486251, + "timestamp": "2025-09-10 02:27:49.107264", + "step": 5654, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:27:49.145723", + "step": 5654, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011858725920319557, + "timestamp": "2025-09-10 02:27:49.161628", + "step": 5655, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:49.193563", + "step": 5655, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020078910165466368, + "timestamp": "2025-09-10 02:27:49.222016", + "step": 5656, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:49.258814", + "step": 5656, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003024769830517471, + "timestamp": "2025-09-10 02:27:49.264152", + "step": 5657, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:49.295019", + "step": 5657, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010363813635194674, + "timestamp": "2025-09-10 02:27:49.299630", + "step": 5658, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:49.330561", + "step": 5658, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000979832955636084, + "timestamp": "2025-09-10 02:27:49.337473", + "step": 5659, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:49.368731", + "step": 5659, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00316768535412848, + "timestamp": "2025-09-10 02:27:49.397153", + "step": 5660, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:49.428455", + "step": 5660, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000212435275898315, + "timestamp": "2025-09-10 02:27:49.433700", + "step": 5661, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:49.464662", + "step": 5661, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002012075565289706, + "timestamp": "2025-09-10 02:27:49.471690", + "step": 5662, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:49.503263", + "step": 5662, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011027476284652948, + "timestamp": "2025-09-10 02:27:49.510978", + "step": 5663, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:49.542521", + "step": 5663, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003467754868324846, + "timestamp": "2025-09-10 02:27:49.570507", + "step": 5664, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:49.603015", + "step": 5664, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003236646589357406, + "timestamp": "2025-09-10 02:27:49.608415", + "step": 5665, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:49.640871", + "step": 5665, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015571604308206588, + "timestamp": "2025-09-10 02:27:49.651335", + "step": 5666, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:49.683553", + "step": 5666, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.091597555903718e-05, + "timestamp": "2025-09-10 02:27:49.691003", + "step": 5667, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:49.723344", + "step": 5667, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007872511632740498, + "timestamp": "2025-09-10 02:27:49.754583", + "step": 5668, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:49.786086", + "step": 5668, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007030696724541485, + "timestamp": "2025-09-10 02:27:49.798738", + "step": 5669, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:49.831188", + "step": 5669, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006685466505587101, + "timestamp": "2025-09-10 02:27:49.841817", + "step": 5670, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:49.874810", + "step": 5670, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04719764366745949, + "timestamp": "2025-09-10 02:27:49.879227", + "step": 5671, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:49.909984", + "step": 5671, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005829405854456127, + "timestamp": "2025-09-10 02:27:49.935147", + "step": 5672, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:49.966937", + "step": 5672, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043701488175429404, + "timestamp": "2025-09-10 02:27:49.969613", + "step": 5673, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:50.000908", + "step": 5673, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024339115770999342, + "timestamp": "2025-09-10 02:27:50.005518", + "step": 5674, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:50.036102", + "step": 5674, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014959640975575894, + "timestamp": "2025-09-10 02:27:50.043834", + "step": 5675, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:27:50.080322", + "step": 5675, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.585565279237926e-05, + "timestamp": "2025-09-10 02:27:50.115216", + "step": 5676, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:50.146329", + "step": 5676, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05050432309508324, + "timestamp": "2025-09-10 02:27:50.156092", + "step": 5677, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:27:50.190885", + "step": 5677, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037958953180350363, + "timestamp": "2025-09-10 02:27:50.204725", + "step": 5678, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:50.235976", + "step": 5678, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040227436693385243, + "timestamp": "2025-09-10 02:27:50.242748", + "step": 5679, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:50.273801", + "step": 5679, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012008142948616296, + "timestamp": "2025-09-10 02:27:50.298810", + "step": 5680, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:50.329195", + "step": 5680, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.119750757236034e-05, + "timestamp": "2025-09-10 02:27:50.333798", + "step": 5681, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:50.365461", + "step": 5681, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011381086660549045, + "timestamp": "2025-09-10 02:27:50.378048", + "step": 5682, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 576 + ], + "flops": 17085996872448 + }, + "timestamp": "2025-09-10 02:27:50.427147", + "step": 5682, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008125408785417676, + "timestamp": "2025-09-10 02:27:50.446582", + "step": 5683, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:50.478234", + "step": 5683, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010114780889125541, + "timestamp": "2025-09-10 02:27:50.509371", + "step": 5684, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:50.540848", + "step": 5684, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005297398311085999, + "timestamp": "2025-09-10 02:27:50.543033", + "step": 5685, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:50.574441", + "step": 5685, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005330296116881073, + "timestamp": "2025-09-10 02:27:50.582221", + "step": 5686, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:27:50.616873", + "step": 5686, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010487495455890894, + "timestamp": "2025-09-10 02:27:50.630586", + "step": 5687, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:27:50.661249", + "step": 5687, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015214362065307796, + "timestamp": "2025-09-10 02:27:50.685189", + "step": 5688, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:50.717537", + "step": 5688, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010265544056892395, + "timestamp": "2025-09-10 02:27:50.719956", + "step": 5689, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:50.750598", + "step": 5689, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020830826833844185, + "timestamp": "2025-09-10 02:27:50.757664", + "step": 5690, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:50.789024", + "step": 5690, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004475609748624265, + "timestamp": "2025-09-10 02:27:50.793326", + "step": 5691, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:27:50.824013", + "step": 5691, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002475904766470194, + "timestamp": "2025-09-10 02:27:50.847535", + "step": 5692, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:50.879538", + "step": 5692, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027441454585641623, + "timestamp": "2025-09-10 02:27:50.881916", + "step": 5693, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:50.916408", + "step": 5693, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001911252038553357, + "timestamp": "2025-09-10 02:27:50.926786", + "step": 5694, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:50.958900", + "step": 5694, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.5323111155303195e-05, + "timestamp": "2025-09-10 02:27:50.966470", + "step": 5695, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:50.999357", + "step": 5695, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004969104775227606, + "timestamp": "2025-09-10 02:27:51.031237", + "step": 5696, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:51.063553", + "step": 5696, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03493015095591545, + "timestamp": "2025-09-10 02:27:51.065993", + "step": 5697, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:51.097386", + "step": 5697, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.232889376813546e-05, + "timestamp": "2025-09-10 02:27:51.104580", + "step": 5698, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:27:51.135783", + "step": 5698, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008442237740382552, + "timestamp": "2025-09-10 02:27:51.139307", + "step": 5699, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:51.170109", + "step": 5699, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006859573069959879, + "timestamp": "2025-09-10 02:27:51.194266", + "step": 5700, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:51.227350", + "step": 5700, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002599300933070481, + "timestamp": "2025-09-10 02:27:51.232459", + "step": 5701, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:27:51.266118", + "step": 5701, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004736356902867556, + "timestamp": "2025-09-10 02:27:51.279485", + "step": 5702, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:51.311764", + "step": 5702, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007690637721680105, + "timestamp": "2025-09-10 02:27:51.316233", + "step": 5703, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:51.347562", + "step": 5703, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0204758383333683, + "timestamp": "2025-09-10 02:27:51.380594", + "step": 5704, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:51.412550", + "step": 5704, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011761164059862494, + "timestamp": "2025-09-10 02:27:51.416928", + "step": 5705, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:51.448539", + "step": 5705, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.793437594547868e-05, + "timestamp": "2025-09-10 02:27:51.455944", + "step": 5706, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:51.487078", + "step": 5706, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.091299969237298e-05, + "timestamp": "2025-09-10 02:27:51.493963", + "step": 5707, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:51.526583", + "step": 5707, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03535769507288933, + "timestamp": "2025-09-10 02:27:51.551702", + "step": 5708, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:51.583111", + "step": 5708, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004072172741871327, + "timestamp": "2025-09-10 02:27:51.588498", + "step": 5709, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:51.619214", + "step": 5709, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003087377699557692, + "timestamp": "2025-09-10 02:27:51.627130", + "step": 5710, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:51.658723", + "step": 5710, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006235843989998102, + "timestamp": "2025-09-10 02:27:51.670864", + "step": 5711, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:51.702195", + "step": 5711, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.547135464847088e-05, + "timestamp": "2025-09-10 02:27:51.730599", + "step": 5712, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:51.762336", + "step": 5712, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005502524436451495, + "timestamp": "2025-09-10 02:27:51.767362", + "step": 5713, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:27:51.798998", + "step": 5713, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001679424021858722, + "timestamp": "2025-09-10 02:27:51.811368", + "step": 5714, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:27:51.842756", + "step": 5714, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006052498356439173, + "timestamp": "2025-09-10 02:27:51.845260", + "step": 5715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:27:51.877035", + "step": 5715, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018796491203829646, + "timestamp": "2025-09-10 02:27:51.910146", + "step": 5716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:51.940836", + "step": 5716, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015208103694021702, + "timestamp": "2025-09-10 02:27:51.943246", + "step": 5717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:51.974973", + "step": 5717, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038770330138504505, + "timestamp": "2025-09-10 02:27:51.985251", + "step": 5718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:52.017433", + "step": 5718, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001229040208272636, + "timestamp": "2025-09-10 02:27:52.027604", + "step": 5719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:52.058545", + "step": 5719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011433316394686699, + "timestamp": "2025-09-10 02:27:52.086539", + "step": 5720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:52.117962", + "step": 5720, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003462762397248298, + "timestamp": "2025-09-10 02:27:52.120244", + "step": 5721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:52.152796", + "step": 5721, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000564678106456995, + "timestamp": "2025-09-10 02:27:52.159875", + "step": 5722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:27:52.192123", + "step": 5722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010591919999569654, + "timestamp": "2025-09-10 02:27:52.199658", + "step": 5723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:27:52.230817", + "step": 5723, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01734619028866291, + "timestamp": "2025-09-10 02:27:52.258566", + "step": 5724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:27:52.289646", + "step": 5724, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001629464386496693, + "timestamp": "2025-09-10 02:27:52.297564", + "step": 5725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:27:52.330081", + "step": 5725, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.870922591886483e-05, + "timestamp": "2025-09-10 02:27:52.333926", + "step": 5726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:52.365369", + "step": 5726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018190907314419746, + "timestamp": "2025-09-10 02:27:52.369832", + "step": 5727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:27:52.400840", + "step": 5727, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.9860497306799516e-05, + "timestamp": "2025-09-10 02:27:52.426196", + "step": 5728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:52.456878", + "step": 5728, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004968225257471204, + "timestamp": "2025-09-10 02:27:52.465537", + "step": 5729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:27:52.496269", + "step": 5729, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01434040255844593, + "timestamp": "2025-09-10 02:27:52.504074", + "step": 5730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:27:52.537346", + "step": 5730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005067458841949701, + "timestamp": "2025-09-10 02:27:52.544273", + "step": 5731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:27:52.576225", + "step": 5731, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.439605491934344e-05, + "timestamp": "2025-09-10 02:27:52.608122", + "step": 5732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:27:52.645892", + "step": 5732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003644505748525262, + "timestamp": "2025-09-10 02:27:52.661313", + "step": 5733, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:28:02.807246", + "step": 5733, + "epoch": 3 + }, + { + "type": "pplx", + "content": 22101991.669623252, + "timestamp": "2025-09-10 02:28:02.810174", + "step": 5733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:02.840101", + "step": 5733, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.033737100660800934, + "timestamp": "2025-09-10 02:28:02.842373", + "step": 5734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:28:02.876069", + "step": 5734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036468225880526006, + "timestamp": "2025-09-10 02:28:02.878737", + "step": 5735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:02.910075", + "step": 5735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009591284207999706, + "timestamp": "2025-09-10 02:28:02.937888", + "step": 5736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:02.969710", + "step": 5736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036816957872360945, + "timestamp": "2025-09-10 02:28:02.980055", + "step": 5737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:28:03.016287", + "step": 5737, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011977337999269366, + "timestamp": "2025-09-10 02:28:03.030182", + "step": 5738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:28:03.069816", + "step": 5738, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003067262005060911, + "timestamp": "2025-09-10 02:28:03.085708", + "step": 5739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:03.121082", + "step": 5739, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026271553710103035, + "timestamp": "2025-09-10 02:28:03.155396", + "step": 5740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:03.189088", + "step": 5740, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015502488240599632, + "timestamp": "2025-09-10 02:28:03.198824", + "step": 5741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:28:03.235146", + "step": 5741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022286844614427537, + "timestamp": "2025-09-10 02:28:03.249150", + "step": 5742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:03.283744", + "step": 5742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005459203966893256, + "timestamp": "2025-09-10 02:28:03.294604", + "step": 5743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:03.329955", + "step": 5743, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006617820472456515, + "timestamp": "2025-09-10 02:28:03.360954", + "step": 5744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:03.393855", + "step": 5744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014259156887419522, + "timestamp": "2025-09-10 02:28:03.396424", + "step": 5745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:03.434320", + "step": 5745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004075972363352776, + "timestamp": "2025-09-10 02:28:03.437045", + "step": 5746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:03.469204", + "step": 5746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010417302837595344, + "timestamp": "2025-09-10 02:28:03.476114", + "step": 5747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:03.507167", + "step": 5747, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0031860729213804007, + "timestamp": "2025-09-10 02:28:03.531694", + "step": 5748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:03.562989", + "step": 5748, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.74440452712588e-05, + "timestamp": "2025-09-10 02:28:03.568360", + "step": 5749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:03.599347", + "step": 5749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004242011927999556, + "timestamp": "2025-09-10 02:28:03.606428", + "step": 5750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:03.638907", + "step": 5750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002124165650457144, + "timestamp": "2025-09-10 02:28:03.651497", + "step": 5751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:03.685467", + "step": 5751, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002149190113414079, + "timestamp": "2025-09-10 02:28:03.710926", + "step": 5752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:03.743262", + "step": 5752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003125338116660714, + "timestamp": "2025-09-10 02:28:03.745774", + "step": 5753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:03.780095", + "step": 5753, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010561124421656132, + "timestamp": "2025-09-10 02:28:03.791977", + "step": 5754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:28:03.832603", + "step": 5754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001346966892015189, + "timestamp": "2025-09-10 02:28:03.846651", + "step": 5755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:03.878073", + "step": 5755, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008288529934361577, + "timestamp": "2025-09-10 02:28:03.909071", + "step": 5756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:03.939746", + "step": 5756, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022131487727165222, + "timestamp": "2025-09-10 02:28:03.944860", + "step": 5757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:03.977124", + "step": 5757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015002823784016073, + "timestamp": "2025-09-10 02:28:03.984507", + "step": 5758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:04.020408", + "step": 5758, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003043616015929729, + "timestamp": "2025-09-10 02:28:04.032525", + "step": 5759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:28:04.072446", + "step": 5759, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.715518636861816e-05, + "timestamp": "2025-09-10 02:28:04.109495", + "step": 5760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:04.141618", + "step": 5760, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012244329263921827, + "timestamp": "2025-09-10 02:28:04.146827", + "step": 5761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:04.178768", + "step": 5761, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004466865211725235, + "timestamp": "2025-09-10 02:28:04.185715", + "step": 5762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:04.217142", + "step": 5762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006833565421402454, + "timestamp": "2025-09-10 02:28:04.229486", + "step": 5763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:04.260629", + "step": 5763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013124813558533788, + "timestamp": "2025-09-10 02:28:04.289011", + "step": 5764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:04.321369", + "step": 5764, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013321618316695094, + "timestamp": "2025-09-10 02:28:04.328935", + "step": 5765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:04.363050", + "step": 5765, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.689150854479522e-05, + "timestamp": "2025-09-10 02:28:04.376445", + "step": 5766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:04.408901", + "step": 5766, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008448630687780678, + "timestamp": "2025-09-10 02:28:04.412863", + "step": 5767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:04.447011", + "step": 5767, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.289824134204537e-05, + "timestamp": "2025-09-10 02:28:04.481249", + "step": 5768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:04.513451", + "step": 5768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007902790675871074, + "timestamp": "2025-09-10 02:28:04.521242", + "step": 5769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:04.552772", + "step": 5769, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.018035726621747017, + "timestamp": "2025-09-10 02:28:04.560464", + "step": 5770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:04.594612", + "step": 5770, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003475056146271527, + "timestamp": "2025-09-10 02:28:04.602399", + "step": 5771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:04.634735", + "step": 5771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001296757603995502, + "timestamp": "2025-09-10 02:28:04.663414", + "step": 5772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:04.695104", + "step": 5772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012029794743284583, + "timestamp": "2025-09-10 02:28:04.700693", + "step": 5773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:04.731906", + "step": 5773, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002924522617831826, + "timestamp": "2025-09-10 02:28:04.739804", + "step": 5774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:04.771603", + "step": 5774, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025918486062437296, + "timestamp": "2025-09-10 02:28:04.776217", + "step": 5775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:28:04.811165", + "step": 5775, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013173665502108634, + "timestamp": "2025-09-10 02:28:04.845733", + "step": 5776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:04.877086", + "step": 5776, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01582178846001625, + "timestamp": "2025-09-10 02:28:04.884742", + "step": 5777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:28:04.927710", + "step": 5777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009278925135731697, + "timestamp": "2025-09-10 02:28:04.945062", + "step": 5778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:04.977713", + "step": 5778, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010470326524227858, + "timestamp": "2025-09-10 02:28:04.984773", + "step": 5779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:05.016401", + "step": 5779, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019801973830908537, + "timestamp": "2025-09-10 02:28:05.043954", + "step": 5780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:05.074622", + "step": 5780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018586774822324514, + "timestamp": "2025-09-10 02:28:05.079203", + "step": 5781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:05.109637", + "step": 5781, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046877076965756714, + "timestamp": "2025-09-10 02:28:05.119815", + "step": 5782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:05.150944", + "step": 5782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047852486022748053, + "timestamp": "2025-09-10 02:28:05.158370", + "step": 5783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:05.190177", + "step": 5783, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019953006994910538, + "timestamp": "2025-09-10 02:28:05.218090", + "step": 5784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:05.248152", + "step": 5784, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.028778070583939552, + "timestamp": "2025-09-10 02:28:05.256231", + "step": 5785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:05.287974", + "step": 5785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035870965803042054, + "timestamp": "2025-09-10 02:28:05.295459", + "step": 5786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:05.327021", + "step": 5786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00494037102907896, + "timestamp": "2025-09-10 02:28:05.334576", + "step": 5787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:05.366972", + "step": 5787, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017796893371269107, + "timestamp": "2025-09-10 02:28:05.394417", + "step": 5788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:05.425926", + "step": 5788, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003134679514914751, + "timestamp": "2025-09-10 02:28:05.430593", + "step": 5789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:05.461991", + "step": 5789, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004141936369705945, + "timestamp": "2025-09-10 02:28:05.466566", + "step": 5790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:05.498350", + "step": 5790, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000323984568240121, + "timestamp": "2025-09-10 02:28:05.501044", + "step": 5791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:05.532998", + "step": 5791, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023834407329559326, + "timestamp": "2025-09-10 02:28:05.560963", + "step": 5792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:05.607438", + "step": 5792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019796183914877474, + "timestamp": "2025-09-10 02:28:05.612967", + "step": 5793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:05.643451", + "step": 5793, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002645928878337145, + "timestamp": "2025-09-10 02:28:05.651231", + "step": 5794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:05.681908", + "step": 5794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006678312201984227, + "timestamp": "2025-09-10 02:28:05.688973", + "step": 5795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:28:05.732261", + "step": 5795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002754285524133593, + "timestamp": "2025-09-10 02:28:05.766953", + "step": 5796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:05.798145", + "step": 5796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007207631133496761, + "timestamp": "2025-09-10 02:28:05.803576", + "step": 5797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:05.835472", + "step": 5797, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006030822987668216, + "timestamp": "2025-09-10 02:28:05.842377", + "step": 5798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:05.873976", + "step": 5798, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043905325583182275, + "timestamp": "2025-09-10 02:28:05.884375", + "step": 5799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:05.919505", + "step": 5799, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043995765736326575, + "timestamp": "2025-09-10 02:28:05.947055", + "step": 5800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:05.979533", + "step": 5800, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005144139868207276, + "timestamp": "2025-09-10 02:28:05.987903", + "step": 5801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:06.019258", + "step": 5801, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005805970868095756, + "timestamp": "2025-09-10 02:28:06.029217", + "step": 5802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:06.061900", + "step": 5802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004789031110703945, + "timestamp": "2025-09-10 02:28:06.069365", + "step": 5803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:06.100953", + "step": 5803, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047223473666235805, + "timestamp": "2025-09-10 02:28:06.132849", + "step": 5804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:06.165408", + "step": 5804, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000832175777759403, + "timestamp": "2025-09-10 02:28:06.170104", + "step": 5805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:06.201542", + "step": 5805, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040307757444679737, + "timestamp": "2025-09-10 02:28:06.212414", + "step": 5806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:06.243676", + "step": 5806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005037328810431063, + "timestamp": "2025-09-10 02:28:06.256030", + "step": 5807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:06.287141", + "step": 5807, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005597766139544547, + "timestamp": "2025-09-10 02:28:06.314819", + "step": 5808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:06.346542", + "step": 5808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008857275359332561, + "timestamp": "2025-09-10 02:28:06.354152", + "step": 5809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:06.387344", + "step": 5809, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009536809520795941, + "timestamp": "2025-09-10 02:28:06.391070", + "step": 5810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:06.423975", + "step": 5810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008017036016099155, + "timestamp": "2025-09-10 02:28:06.430461", + "step": 5811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:06.463001", + "step": 5811, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008804936660453677, + "timestamp": "2025-09-10 02:28:06.496465", + "step": 5812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:28:06.534358", + "step": 5812, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013134771725162864, + "timestamp": "2025-09-10 02:28:06.549509", + "step": 5813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:06.581408", + "step": 5813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001699960557743907, + "timestamp": "2025-09-10 02:28:06.585313", + "step": 5814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:06.620356", + "step": 5814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022905482910573483, + "timestamp": "2025-09-10 02:28:06.627965", + "step": 5815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:06.659439", + "step": 5815, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005473580211400986, + "timestamp": "2025-09-10 02:28:06.684299", + "step": 5816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:06.717091", + "step": 5816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022384269686881453, + "timestamp": "2025-09-10 02:28:06.729757", + "step": 5817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:06.761616", + "step": 5817, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034808197524398565, + "timestamp": "2025-09-10 02:28:06.771789", + "step": 5818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:06.804176", + "step": 5818, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008100624545477331, + "timestamp": "2025-09-10 02:28:06.808494", + "step": 5819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:06.840180", + "step": 5819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004766239726450294, + "timestamp": "2025-09-10 02:28:06.865355", + "step": 5820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:28:06.898933", + "step": 5820, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017594116798136383, + "timestamp": "2025-09-10 02:28:06.911961", + "step": 5821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:06.948075", + "step": 5821, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.833243191475049e-05, + "timestamp": "2025-09-10 02:28:06.960014", + "step": 5822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:06.992918", + "step": 5822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015262920351233333, + "timestamp": "2025-09-10 02:28:06.999660", + "step": 5823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:07.034959", + "step": 5823, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041047646664083004, + "timestamp": "2025-09-10 02:28:07.063560", + "step": 5824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:07.096517", + "step": 5824, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002567381889093667, + "timestamp": "2025-09-10 02:28:07.098731", + "step": 5825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:07.132739", + "step": 5825, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04794417694211006, + "timestamp": "2025-09-10 02:28:07.136836", + "step": 5826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:07.168906", + "step": 5826, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002849227748811245, + "timestamp": "2025-09-10 02:28:07.175809", + "step": 5827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:07.206278", + "step": 5827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000525909592397511, + "timestamp": "2025-09-10 02:28:07.234310", + "step": 5828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:07.265525", + "step": 5828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005727821262553334, + "timestamp": "2025-09-10 02:28:07.270526", + "step": 5829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:07.308371", + "step": 5829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017900993407238275, + "timestamp": "2025-09-10 02:28:07.317078", + "step": 5830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:07.355057", + "step": 5830, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022803548199590296, + "timestamp": "2025-09-10 02:28:07.359320", + "step": 5831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:07.390801", + "step": 5831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024710877914913, + "timestamp": "2025-09-10 02:28:07.416686", + "step": 5832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:07.447536", + "step": 5832, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00446285679936409, + "timestamp": "2025-09-10 02:28:07.450094", + "step": 5833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:07.480424", + "step": 5833, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004049288108944893, + "timestamp": "2025-09-10 02:28:07.488080", + "step": 5834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:07.524052", + "step": 5834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038109347224235535, + "timestamp": "2025-09-10 02:28:07.531088", + "step": 5835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:07.563022", + "step": 5835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032000825740396976, + "timestamp": "2025-09-10 02:28:07.587896", + "step": 5836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:07.619266", + "step": 5836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012009877245873213, + "timestamp": "2025-09-10 02:28:07.624814", + "step": 5837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:07.655720", + "step": 5837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006187800318002701, + "timestamp": "2025-09-10 02:28:07.663362", + "step": 5838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:07.697095", + "step": 5838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020875423215329647, + "timestamp": "2025-09-10 02:28:07.710498", + "step": 5839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:07.741626", + "step": 5839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002056649245787412, + "timestamp": "2025-09-10 02:28:07.769542", + "step": 5840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:07.800908", + "step": 5840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004982929094694555, + "timestamp": "2025-09-10 02:28:07.803353", + "step": 5841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:07.835570", + "step": 5841, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000268876610789448, + "timestamp": "2025-09-10 02:28:07.842998", + "step": 5842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:07.876751", + "step": 5842, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005106105236336589, + "timestamp": "2025-09-10 02:28:07.890146", + "step": 5843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:07.924353", + "step": 5843, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.785356137901545e-05, + "timestamp": "2025-09-10 02:28:07.952639", + "step": 5844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:07.983801", + "step": 5844, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004512739833444357, + "timestamp": "2025-09-10 02:28:07.991443", + "step": 5845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:08.023478", + "step": 5845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006680196383967996, + "timestamp": "2025-09-10 02:28:08.027553", + "step": 5846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:08.058692", + "step": 5846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003025097248610109, + "timestamp": "2025-09-10 02:28:08.065598", + "step": 5847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:08.097241", + "step": 5847, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005206712055951357, + "timestamp": "2025-09-10 02:28:08.125006", + "step": 5848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:08.156546", + "step": 5848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008312045596539974, + "timestamp": "2025-09-10 02:28:08.161170", + "step": 5849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:08.192371", + "step": 5849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003203331143595278, + "timestamp": "2025-09-10 02:28:08.199841", + "step": 5850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:28:08.238100", + "step": 5850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007190610049292445, + "timestamp": "2025-09-10 02:28:08.253892", + "step": 5851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:08.285276", + "step": 5851, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019516788888722658, + "timestamp": "2025-09-10 02:28:08.310723", + "step": 5852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:28:08.343465", + "step": 5852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022987746633589268, + "timestamp": "2025-09-10 02:28:08.356560", + "step": 5853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:08.387174", + "step": 5853, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023405192769132555, + "timestamp": "2025-09-10 02:28:08.389924", + "step": 5854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:08.421570", + "step": 5854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021774417837150395, + "timestamp": "2025-09-10 02:28:08.429305", + "step": 5855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:08.461401", + "step": 5855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024362494878005236, + "timestamp": "2025-09-10 02:28:08.492667", + "step": 5856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:08.525393", + "step": 5856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006450935616157949, + "timestamp": "2025-09-10 02:28:08.530034", + "step": 5857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:08.561832", + "step": 5857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009278419311158359, + "timestamp": "2025-09-10 02:28:08.569270", + "step": 5858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:08.602237", + "step": 5858, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007209957693703473, + "timestamp": "2025-09-10 02:28:08.608970", + "step": 5859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:08.639784", + "step": 5859, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.901495387661271e-05, + "timestamp": "2025-09-10 02:28:08.663893", + "step": 5860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:08.697450", + "step": 5860, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000261887616943568, + "timestamp": "2025-09-10 02:28:08.701869", + "step": 5861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:08.748593", + "step": 5861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002333016600459814, + "timestamp": "2025-09-10 02:28:08.753260", + "step": 5862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:08.785064", + "step": 5862, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048499341937713325, + "timestamp": "2025-09-10 02:28:08.792648", + "step": 5863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:08.824162", + "step": 5863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001742523891152814, + "timestamp": "2025-09-10 02:28:08.852884", + "step": 5864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:08.884034", + "step": 5864, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020447876886464655, + "timestamp": "2025-09-10 02:28:08.889211", + "step": 5865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:08.922986", + "step": 5865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033087453339248896, + "timestamp": "2025-09-10 02:28:08.935260", + "step": 5866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:08.965827", + "step": 5866, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000428111816290766, + "timestamp": "2025-09-10 02:28:08.972751", + "step": 5867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:09.004500", + "step": 5867, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031185123953036964, + "timestamp": "2025-09-10 02:28:09.031969", + "step": 5868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:09.064488", + "step": 5868, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.067446131259203e-05, + "timestamp": "2025-09-10 02:28:09.072320", + "step": 5869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:09.103897", + "step": 5869, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012084973277524114, + "timestamp": "2025-09-10 02:28:09.111514", + "step": 5870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:09.148832", + "step": 5870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005114732775837183, + "timestamp": "2025-09-10 02:28:09.153362", + "step": 5871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:09.191671", + "step": 5871, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019311138021294028, + "timestamp": "2025-09-10 02:28:09.219678", + "step": 5872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:09.259004", + "step": 5872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00104613380972296, + "timestamp": "2025-09-10 02:28:09.264427", + "step": 5873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:09.300484", + "step": 5873, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.62386228214018e-05, + "timestamp": "2025-09-10 02:28:09.308028", + "step": 5874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:09.340490", + "step": 5874, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.228465205524117e-05, + "timestamp": "2025-09-10 02:28:09.350633", + "step": 5875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:09.382056", + "step": 5875, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.762354653095827e-05, + "timestamp": "2025-09-10 02:28:09.410773", + "step": 5876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:09.441904", + "step": 5876, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005466092843562365, + "timestamp": "2025-09-10 02:28:09.447342", + "step": 5877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:09.479585", + "step": 5877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009989795507863164, + "timestamp": "2025-09-10 02:28:09.483748", + "step": 5878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:09.514404", + "step": 5878, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.646583253517747e-05, + "timestamp": "2025-09-10 02:28:09.521969", + "step": 5879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:09.552415", + "step": 5879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011240827152505517, + "timestamp": "2025-09-10 02:28:09.577800", + "step": 5880, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:28:19.889322", + "step": 5880, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23481078.320965376, + "timestamp": "2025-09-10 02:28:19.892251", + "step": 5880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:19.922083", + "step": 5880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028165520052425563, + "timestamp": "2025-09-10 02:28:19.926300", + "step": 5881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:19.957969", + "step": 5881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006952984258532524, + "timestamp": "2025-09-10 02:28:19.967638", + "step": 5882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:28:20.007260", + "step": 5882, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002651652612257749, + "timestamp": "2025-09-10 02:28:20.023204", + "step": 5883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:20.053550", + "step": 5883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028942085918970406, + "timestamp": "2025-09-10 02:28:20.078433", + "step": 5884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:20.109579", + "step": 5884, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.7478438975522295e-05, + "timestamp": "2025-09-10 02:28:20.114673", + "step": 5885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:20.145848", + "step": 5885, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.486764611210674e-05, + "timestamp": "2025-09-10 02:28:20.158042", + "step": 5886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:20.188926", + "step": 5886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015322092804126441, + "timestamp": "2025-09-10 02:28:20.199824", + "step": 5887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:20.231887", + "step": 5887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002798614732455462, + "timestamp": "2025-09-10 02:28:20.257063", + "step": 5888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:28:20.293618", + "step": 5888, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011071518063545227, + "timestamp": "2025-09-10 02:28:20.309306", + "step": 5889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:20.341402", + "step": 5889, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.499214239534922e-05, + "timestamp": "2025-09-10 02:28:20.352272", + "step": 5890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:20.383929", + "step": 5890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014012886094860733, + "timestamp": "2025-09-10 02:28:20.396510", + "step": 5891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:20.427689", + "step": 5891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002186378842452541, + "timestamp": "2025-09-10 02:28:20.453131", + "step": 5892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:20.485108", + "step": 5892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012612577120307833, + "timestamp": "2025-09-10 02:28:20.490130", + "step": 5893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:28:20.521235", + "step": 5893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001257530675502494, + "timestamp": "2025-09-10 02:28:20.523900", + "step": 5894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:20.557348", + "step": 5894, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.036817209562287e-05, + "timestamp": "2025-09-10 02:28:20.561710", + "step": 5895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:28:20.608903", + "step": 5895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.024240778759121895, + "timestamp": "2025-09-10 02:28:20.647471", + "step": 5896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:20.684847", + "step": 5896, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.944705430418253e-05, + "timestamp": "2025-09-10 02:28:20.693630", + "step": 5897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:20.732919", + "step": 5897, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011858268408104777, + "timestamp": "2025-09-10 02:28:20.736885", + "step": 5898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:20.770947", + "step": 5898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002331020077690482, + "timestamp": "2025-09-10 02:28:20.777874", + "step": 5899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:20.813776", + "step": 5899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038511460297740996, + "timestamp": "2025-09-10 02:28:20.841646", + "step": 5900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:20.878522", + "step": 5900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011608708882704377, + "timestamp": "2025-09-10 02:28:20.886729", + "step": 5901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:20.917807", + "step": 5901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002312576165422797, + "timestamp": "2025-09-10 02:28:20.920438", + "step": 5902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:20.958793", + "step": 5902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002418461488559842, + "timestamp": "2025-09-10 02:28:20.965545", + "step": 5903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:20.997621", + "step": 5903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005505615263246, + "timestamp": "2025-09-10 02:28:21.025586", + "step": 5904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:21.058667", + "step": 5904, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045085299643687904, + "timestamp": "2025-09-10 02:28:21.068386", + "step": 5905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:21.104652", + "step": 5905, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.284094706643373e-05, + "timestamp": "2025-09-10 02:28:21.111493", + "step": 5906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:21.145812", + "step": 5906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002078805264318362, + "timestamp": "2025-09-10 02:28:21.159192", + "step": 5907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:21.199027", + "step": 5907, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03610233590006828, + "timestamp": "2025-09-10 02:28:21.227763", + "step": 5908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 18509808050496 + }, + "timestamp": "2025-09-10 02:28:21.276310", + "step": 5908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009849478956311941, + "timestamp": "2025-09-10 02:28:21.298098", + "step": 5909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:21.334078", + "step": 5909, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027651750133372843, + "timestamp": "2025-09-10 02:28:21.344901", + "step": 5910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:21.378005", + "step": 5910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013139198999851942, + "timestamp": "2025-09-10 02:28:21.385789", + "step": 5911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:28:21.446447", + "step": 5911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023808155674487352, + "timestamp": "2025-09-10 02:28:21.482942", + "step": 5912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:21.530354", + "step": 5912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000658250879496336, + "timestamp": "2025-09-10 02:28:21.536351", + "step": 5913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:21.568875", + "step": 5913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00039495486998930573, + "timestamp": "2025-09-10 02:28:21.573402", + "step": 5914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:21.620099", + "step": 5914, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012653195299208164, + "timestamp": "2025-09-10 02:28:21.633469", + "step": 5915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:28:21.685057", + "step": 5915, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0027082362212240696, + "timestamp": "2025-09-10 02:28:21.723071", + "step": 5916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:28:21.773382", + "step": 5916, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002292887365911156, + "timestamp": "2025-09-10 02:28:21.786732", + "step": 5917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:21.829150", + "step": 5917, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.886863750172779e-05, + "timestamp": "2025-09-10 02:28:21.836156", + "step": 5918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:21.874513", + "step": 5918, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01980876363813877, + "timestamp": "2025-09-10 02:28:21.886855", + "step": 5919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:21.922524", + "step": 5919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007908547413535416, + "timestamp": "2025-09-10 02:28:21.954458", + "step": 5920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:21.990423", + "step": 5920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044035873725079, + "timestamp": "2025-09-10 02:28:21.998468", + "step": 5921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:22.031255", + "step": 5921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018272080342285335, + "timestamp": "2025-09-10 02:28:22.043192", + "step": 5922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:22.084988", + "step": 5922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02625429444015026, + "timestamp": "2025-09-10 02:28:22.089699", + "step": 5923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:22.136258", + "step": 5923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017044544802047312, + "timestamp": "2025-09-10 02:28:22.164393", + "step": 5924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:22.200917", + "step": 5924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031413830583915114, + "timestamp": "2025-09-10 02:28:22.208548", + "step": 5925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:22.245116", + "step": 5925, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015044523170217872, + "timestamp": "2025-09-10 02:28:22.251901", + "step": 5926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:22.284117", + "step": 5926, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.876120409695432e-05, + "timestamp": "2025-09-10 02:28:22.291415", + "step": 5927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:22.322691", + "step": 5927, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020055093336850405, + "timestamp": "2025-09-10 02:28:22.350437", + "step": 5928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:22.386625", + "step": 5928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002611653646454215, + "timestamp": "2025-09-10 02:28:22.394203", + "step": 5929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:22.427632", + "step": 5929, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.566867083776742e-05, + "timestamp": "2025-09-10 02:28:22.437509", + "step": 5930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:22.469507", + "step": 5930, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027315152692608535, + "timestamp": "2025-09-10 02:28:22.473157", + "step": 5931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:22.506743", + "step": 5931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031735419179312885, + "timestamp": "2025-09-10 02:28:22.536072", + "step": 5932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:22.569249", + "step": 5932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007971972227096558, + "timestamp": "2025-09-10 02:28:22.574548", + "step": 5933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:22.609284", + "step": 5933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020833786111325026, + "timestamp": "2025-09-10 02:28:22.616835", + "step": 5934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:22.650154", + "step": 5934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014346832409501076, + "timestamp": "2025-09-10 02:28:22.652891", + "step": 5935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:22.684922", + "step": 5935, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011734214058378711, + "timestamp": "2025-09-10 02:28:22.709802", + "step": 5936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:22.742162", + "step": 5936, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005824709078297019, + "timestamp": "2025-09-10 02:28:22.747280", + "step": 5937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:22.784761", + "step": 5937, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002061406703433022, + "timestamp": "2025-09-10 02:28:22.797027", + "step": 5938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:22.834515", + "step": 5938, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000224357980187051, + "timestamp": "2025-09-10 02:28:22.838521", + "step": 5939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:22.874457", + "step": 5939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007039483753032982, + "timestamp": "2025-09-10 02:28:22.907412", + "step": 5940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:22.953965", + "step": 5940, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016574481560382992, + "timestamp": "2025-09-10 02:28:22.958355", + "step": 5941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:22.990183", + "step": 5941, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001796074939193204, + "timestamp": "2025-09-10 02:28:22.994743", + "step": 5942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:23.028030", + "step": 5942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036119503783993423, + "timestamp": "2025-09-10 02:28:23.037850", + "step": 5943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:23.072595", + "step": 5943, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.575783769832924e-05, + "timestamp": "2025-09-10 02:28:23.100766", + "step": 5944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:23.132544", + "step": 5944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012964828056283295, + "timestamp": "2025-09-10 02:28:23.137289", + "step": 5945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:23.172581", + "step": 5945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028890607063658535, + "timestamp": "2025-09-10 02:28:23.184799", + "step": 5946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:23.219483", + "step": 5946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002000013628276065, + "timestamp": "2025-09-10 02:28:23.226663", + "step": 5947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:23.264835", + "step": 5947, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011124643497169018, + "timestamp": "2025-09-10 02:28:23.289841", + "step": 5948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:23.320747", + "step": 5948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000164168028277345, + "timestamp": "2025-09-10 02:28:23.325561", + "step": 5949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:23.357978", + "step": 5949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011684057244565338, + "timestamp": "2025-09-10 02:28:23.365046", + "step": 5950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:23.395277", + "step": 5950, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.600551129551604e-05, + "timestamp": "2025-09-10 02:28:23.405621", + "step": 5951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:23.436969", + "step": 5951, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005512385396286845, + "timestamp": "2025-09-10 02:28:23.470138", + "step": 5952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:23.502824", + "step": 5952, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.65290460549295e-05, + "timestamp": "2025-09-10 02:28:23.511503", + "step": 5953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:23.559888", + "step": 5953, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011048233864130452, + "timestamp": "2025-09-10 02:28:23.566592", + "step": 5954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:23.604284", + "step": 5954, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010653473873389885, + "timestamp": "2025-09-10 02:28:23.612019", + "step": 5955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:23.652712", + "step": 5955, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.3004063627449796e-05, + "timestamp": "2025-09-10 02:28:23.680551", + "step": 5956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:23.711182", + "step": 5956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041585671715438366, + "timestamp": "2025-09-10 02:28:23.715771", + "step": 5957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:23.747079", + "step": 5957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007976609631441534, + "timestamp": "2025-09-10 02:28:23.754701", + "step": 5958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:23.796867", + "step": 5958, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035458316560834646, + "timestamp": "2025-09-10 02:28:23.803781", + "step": 5959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:23.835245", + "step": 5959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028361781733110547, + "timestamp": "2025-09-10 02:28:23.863166", + "step": 5960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:23.894704", + "step": 5960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024366001889575273, + "timestamp": "2025-09-10 02:28:23.899409", + "step": 5961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:23.930501", + "step": 5961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015509971126448363, + "timestamp": "2025-09-10 02:28:23.938150", + "step": 5962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:23.975810", + "step": 5962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010104191023856401, + "timestamp": "2025-09-10 02:28:23.979823", + "step": 5963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:28:24.029868", + "step": 5963, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019829573284368962, + "timestamp": "2025-09-10 02:28:24.066464", + "step": 5964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:24.105446", + "step": 5964, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.516144200460985e-05, + "timestamp": "2025-09-10 02:28:24.112665", + "step": 5965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:24.144653", + "step": 5965, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004203191492706537, + "timestamp": "2025-09-10 02:28:24.151383", + "step": 5966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:24.185517", + "step": 5966, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.013540414161980152, + "timestamp": "2025-09-10 02:28:24.197692", + "step": 5967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:24.235833", + "step": 5967, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008812797605060041, + "timestamp": "2025-09-10 02:28:24.267088", + "step": 5968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:24.300779", + "step": 5968, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030735571635887027, + "timestamp": "2025-09-10 02:28:24.322539", + "step": 5969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:24.361963", + "step": 5969, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.28549639461562e-05, + "timestamp": "2025-09-10 02:28:24.368722", + "step": 5970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:24.402502", + "step": 5970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002149459905922413, + "timestamp": "2025-09-10 02:28:24.410284", + "step": 5971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:24.445827", + "step": 5971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018557047005742788, + "timestamp": "2025-09-10 02:28:24.472269", + "step": 5972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:24.507841", + "step": 5972, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019703614816535264, + "timestamp": "2025-09-10 02:28:24.510013", + "step": 5973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:24.550589", + "step": 5973, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000475127570098266, + "timestamp": "2025-09-10 02:28:24.557956", + "step": 5974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:24.592600", + "step": 5974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028725885204039514, + "timestamp": "2025-09-10 02:28:24.605998", + "step": 5975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:28:24.645424", + "step": 5975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018704243702813983, + "timestamp": "2025-09-10 02:28:24.682214", + "step": 5976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:24.713623", + "step": 5976, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017203286988660693, + "timestamp": "2025-09-10 02:28:24.721879", + "step": 5977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:24.755433", + "step": 5977, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017131041386164725, + "timestamp": "2025-09-10 02:28:24.762523", + "step": 5978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:24.800064", + "step": 5978, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002911267220042646, + "timestamp": "2025-09-10 02:28:24.806746", + "step": 5979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:24.839679", + "step": 5979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001717909937724471, + "timestamp": "2025-09-10 02:28:24.871236", + "step": 5980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:24.904187", + "step": 5980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016485284140799195, + "timestamp": "2025-09-10 02:28:24.914518", + "step": 5981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:24.945807", + "step": 5981, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001138357212767005, + "timestamp": "2025-09-10 02:28:24.956643", + "step": 5982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:24.997444", + "step": 5982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005265086074359715, + "timestamp": "2025-09-10 02:28:25.009992", + "step": 5983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:25.045539", + "step": 5983, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021492692176252604, + "timestamp": "2025-09-10 02:28:25.073142", + "step": 5984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:25.111871", + "step": 5984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007208751630969346, + "timestamp": "2025-09-10 02:28:25.120136", + "step": 5985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:28:25.163502", + "step": 5985, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015368768945336342, + "timestamp": "2025-09-10 02:28:25.181153", + "step": 5986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:28:25.219981", + "step": 5986, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.340054824249819e-05, + "timestamp": "2025-09-10 02:28:25.235639", + "step": 5987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:25.269918", + "step": 5987, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003260863886680454, + "timestamp": "2025-09-10 02:28:25.294827", + "step": 5988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:25.332469", + "step": 5988, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010763067984953523, + "timestamp": "2025-09-10 02:28:25.336705", + "step": 5989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:25.368246", + "step": 5989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016242521815001965, + "timestamp": "2025-09-10 02:28:25.372341", + "step": 5990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:25.412530", + "step": 5990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010946859401883557, + "timestamp": "2025-09-10 02:28:25.416923", + "step": 5991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:25.457416", + "step": 5991, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001512065064162016, + "timestamp": "2025-09-10 02:28:25.488859", + "step": 5992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:25.526786", + "step": 5992, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014350096171256155, + "timestamp": "2025-09-10 02:28:25.531078", + "step": 5993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:25.566643", + "step": 5993, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001927161356434226, + "timestamp": "2025-09-10 02:28:25.570732", + "step": 5994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:28:25.602996", + "step": 5994, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001801040634745732, + "timestamp": "2025-09-10 02:28:25.606374", + "step": 5995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:25.645401", + "step": 5995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004571221652440727, + "timestamp": "2025-09-10 02:28:25.675773", + "step": 5996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:28:25.711063", + "step": 5996, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004833031562156975, + "timestamp": "2025-09-10 02:28:25.724210", + "step": 5997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:25.758522", + "step": 5997, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.242072362918407e-05, + "timestamp": "2025-09-10 02:28:25.765098", + "step": 5998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:25.804980", + "step": 5998, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008570431964471936, + "timestamp": "2025-09-10 02:28:25.811600", + "step": 5999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:25.844995", + "step": 5999, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004085947584826499, + "timestamp": "2025-09-10 02:28:25.877238", + "step": 6000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 6000", + "timestamp": "2025-09-10 02:28:30.932741", + "step": 6000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:30.970145", + "step": 6000, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.179119322448969e-05, + "timestamp": "2025-09-10 02:28:30.977508", + "step": 6001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:31.010049", + "step": 6001, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.05771319940686226, + "timestamp": "2025-09-10 02:28:31.022181", + "step": 6002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:31.064762", + "step": 6002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022734318918082863, + "timestamp": "2025-09-10 02:28:31.077998", + "step": 6003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:31.112251", + "step": 6003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017205321637447923, + "timestamp": "2025-09-10 02:28:31.139626", + "step": 6004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:31.173783", + "step": 6004, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.907536150421947e-05, + "timestamp": "2025-09-10 02:28:31.181115", + "step": 6005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:31.225328", + "step": 6005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015806824376340955, + "timestamp": "2025-09-10 02:28:31.228995", + "step": 6006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:31.263698", + "step": 6006, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005198474042117596, + "timestamp": "2025-09-10 02:28:31.273239", + "step": 6007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:31.306749", + "step": 6007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007690453785471618, + "timestamp": "2025-09-10 02:28:31.337819", + "step": 6008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:31.378571", + "step": 6008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019263003196101636, + "timestamp": "2025-09-10 02:28:31.382186", + "step": 6009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:31.431013", + "step": 6009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.044559430330991745, + "timestamp": "2025-09-10 02:28:31.438377", + "step": 6010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:28:31.488260", + "step": 6010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024100964947137982, + "timestamp": "2025-09-10 02:28:31.502012", + "step": 6011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:31.535576", + "step": 6011, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010909455158980563, + "timestamp": "2025-09-10 02:28:31.566772", + "step": 6012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:31.603718", + "step": 6012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001878739712992683, + "timestamp": "2025-09-10 02:28:31.606235", + "step": 6013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:31.640132", + "step": 6013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019220814283471555, + "timestamp": "2025-09-10 02:28:31.643888", + "step": 6014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:31.677008", + "step": 6014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022207196161616594, + "timestamp": "2025-09-10 02:28:31.688506", + "step": 6015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:28:31.747631", + "step": 6015, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015352964401245117, + "timestamp": "2025-09-10 02:28:31.785608", + "step": 6016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:31.829619", + "step": 6016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025681753177195787, + "timestamp": "2025-09-10 02:28:31.834054", + "step": 6017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:31.866562", + "step": 6017, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.73608369147405e-05, + "timestamp": "2025-09-10 02:28:31.873272", + "step": 6018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:28:31.913411", + "step": 6018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003728985320776701, + "timestamp": "2025-09-10 02:28:31.929287", + "step": 6019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:28:31.967247", + "step": 6019, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013485472300089896, + "timestamp": "2025-09-10 02:28:31.991264", + "step": 6020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:32.024606", + "step": 6020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008274485589936376, + "timestamp": "2025-09-10 02:28:32.031550", + "step": 6021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:32.066679", + "step": 6021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025299013941548765, + "timestamp": "2025-09-10 02:28:32.080063", + "step": 6022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:32.113274", + "step": 6022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001833633432397619, + "timestamp": "2025-09-10 02:28:32.120704", + "step": 6023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:32.160564", + "step": 6023, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.197562081273645e-05, + "timestamp": "2025-09-10 02:28:32.186162", + "step": 6024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:32.219417", + "step": 6024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024802552070468664, + "timestamp": "2025-09-10 02:28:32.224460", + "step": 6025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:32.281475", + "step": 6025, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000184178032213822, + "timestamp": "2025-09-10 02:28:32.291170", + "step": 6026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:32.323527", + "step": 6026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015562635846436024, + "timestamp": "2025-09-10 02:28:32.334078", + "step": 6027, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:28:42.928241", + "step": 6027, + "epoch": 3 + }, + { + "type": "pplx", + "content": 24656336.660595033, + "timestamp": "2025-09-10 02:28:42.931831", + "step": 6027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:42.964536", + "step": 6027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004599474195856601, + "timestamp": "2025-09-10 02:28:42.996271", + "step": 6028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:43.041688", + "step": 6028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006434383685700595, + "timestamp": "2025-09-10 02:28:43.052048", + "step": 6029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:43.099411", + "step": 6029, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006290274322964251, + "timestamp": "2025-09-10 02:28:43.109959", + "step": 6030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:28:43.149728", + "step": 6030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011963268043473363, + "timestamp": "2025-09-10 02:28:43.163589", + "step": 6031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:28:43.211356", + "step": 6031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023444702674169093, + "timestamp": "2025-09-10 02:28:43.248438", + "step": 6032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:43.285507", + "step": 6032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004736782575491816, + "timestamp": "2025-09-10 02:28:43.290490", + "step": 6033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:43.330232", + "step": 6033, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014104725560173392, + "timestamp": "2025-09-10 02:28:43.342832", + "step": 6034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:43.376100", + "step": 6034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014761857688426971, + "timestamp": "2025-09-10 02:28:43.386327", + "step": 6035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:43.427628", + "step": 6035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003512998518999666, + "timestamp": "2025-09-10 02:28:43.457347", + "step": 6036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:43.505904", + "step": 6036, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010449119145050645, + "timestamp": "2025-09-10 02:28:43.516285", + "step": 6037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:43.558636", + "step": 6037, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028321417630650103, + "timestamp": "2025-09-10 02:28:43.572055", + "step": 6038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:43.605389", + "step": 6038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004012010060250759, + "timestamp": "2025-09-10 02:28:43.617264", + "step": 6039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:43.652786", + "step": 6039, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009143882198259234, + "timestamp": "2025-09-10 02:28:43.684521", + "step": 6040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:43.717671", + "step": 6040, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016910507110878825, + "timestamp": "2025-09-10 02:28:43.730404", + "step": 6041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:43.765385", + "step": 6041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007233879994601011, + "timestamp": "2025-09-10 02:28:43.772972", + "step": 6042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:43.805267", + "step": 6042, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003337309753987938, + "timestamp": "2025-09-10 02:28:43.812135", + "step": 6043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:43.857425", + "step": 6043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001774253905750811, + "timestamp": "2025-09-10 02:28:43.890870", + "step": 6044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:43.926016", + "step": 6044, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009199073538184166, + "timestamp": "2025-09-10 02:28:43.938684", + "step": 6045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:43.973721", + "step": 6045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048229689127765596, + "timestamp": "2025-09-10 02:28:43.978131", + "step": 6046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:44.010245", + "step": 6046, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005172424134798348, + "timestamp": "2025-09-10 02:28:44.022269", + "step": 6047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:44.056102", + "step": 6047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009047497995197773, + "timestamp": "2025-09-10 02:28:44.088040", + "step": 6048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:28:44.140827", + "step": 6048, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007229651673696935, + "timestamp": "2025-09-10 02:28:44.153855", + "step": 6049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:44.205343", + "step": 6049, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003836154646705836, + "timestamp": "2025-09-10 02:28:44.218696", + "step": 6050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:44.257047", + "step": 6050, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00197161384858191, + "timestamp": "2025-09-10 02:28:44.264179", + "step": 6051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:44.300441", + "step": 6051, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01630636677145958, + "timestamp": "2025-09-10 02:28:44.328369", + "step": 6052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:44.364877", + "step": 6052, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.930591916898265e-05, + "timestamp": "2025-09-10 02:28:44.369520", + "step": 6053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:44.409417", + "step": 6053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038816872984170914, + "timestamp": "2025-09-10 02:28:44.421684", + "step": 6054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:44.456166", + "step": 6054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002050340553978458, + "timestamp": "2025-09-10 02:28:44.466586", + "step": 6055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:44.515033", + "step": 6055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029212163644842803, + "timestamp": "2025-09-10 02:28:44.546306", + "step": 6056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:44.591369", + "step": 6056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010645970905898139, + "timestamp": "2025-09-10 02:28:44.597363", + "step": 6057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:44.633439", + "step": 6057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017570939380675554, + "timestamp": "2025-09-10 02:28:44.645838", + "step": 6058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:44.682235", + "step": 6058, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048702204367145896, + "timestamp": "2025-09-10 02:28:44.686480", + "step": 6059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:44.718445", + "step": 6059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012442604929674417, + "timestamp": "2025-09-10 02:28:44.750222", + "step": 6060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:44.785080", + "step": 6060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001539927179692313, + "timestamp": "2025-09-10 02:28:44.789559", + "step": 6061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:44.832176", + "step": 6061, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001589708379469812, + "timestamp": "2025-09-10 02:28:44.845571", + "step": 6062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:44.898514", + "step": 6062, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.440674719167873e-05, + "timestamp": "2025-09-10 02:28:44.908974", + "step": 6063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:44.945327", + "step": 6063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022429468110203743, + "timestamp": "2025-09-10 02:28:44.973142", + "step": 6064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:45.007027", + "step": 6064, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002527502947486937, + "timestamp": "2025-09-10 02:28:45.017259", + "step": 6065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:28:45.056236", + "step": 6065, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001043917378410697, + "timestamp": "2025-09-10 02:28:45.070101", + "step": 6066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:45.102885", + "step": 6066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024156781728379428, + "timestamp": "2025-09-10 02:28:45.115456", + "step": 6067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:45.148757", + "step": 6067, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.486958540743217e-05, + "timestamp": "2025-09-10 02:28:45.179988", + "step": 6068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:45.214966", + "step": 6068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023520128161180764, + "timestamp": "2025-09-10 02:28:45.219084", + "step": 6069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:45.255633", + "step": 6069, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004728248168248683, + "timestamp": "2025-09-10 02:28:45.262485", + "step": 6070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:28:45.306098", + "step": 6070, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002277484891237691, + "timestamp": "2025-09-10 02:28:45.319908", + "step": 6071, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:45.354726", + "step": 6071, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.23619819432497e-05, + "timestamp": "2025-09-10 02:28:45.385846", + "step": 6072, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:45.424762", + "step": 6072, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031708512688055634, + "timestamp": "2025-09-10 02:28:45.435251", + "step": 6073, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:45.468626", + "step": 6073, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004512048908509314, + "timestamp": "2025-09-10 02:28:45.475803", + "step": 6074, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:45.510771", + "step": 6074, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019133243768010288, + "timestamp": "2025-09-10 02:28:45.520955", + "step": 6075, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:45.563511", + "step": 6075, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029035231564193964, + "timestamp": "2025-09-10 02:28:45.596965", + "step": 6076, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:28:45.630230", + "step": 6076, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011966234305873513, + "timestamp": "2025-09-10 02:28:45.643233", + "step": 6077, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:45.678334", + "step": 6077, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.068596561206505e-05, + "timestamp": "2025-09-10 02:28:45.688687", + "step": 6078, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:28:45.727400", + "step": 6078, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018004176672548056, + "timestamp": "2025-09-10 02:28:45.741205", + "step": 6079, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:45.792778", + "step": 6079, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031091499840840697, + "timestamp": "2025-09-10 02:28:45.821496", + "step": 6080, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:45.855903", + "step": 6080, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006474101450294256, + "timestamp": "2025-09-10 02:28:45.863049", + "step": 6081, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:45.900461", + "step": 6081, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032853742595762014, + "timestamp": "2025-09-10 02:28:45.910994", + "step": 6082, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:45.947133", + "step": 6082, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.7726305385585874e-05, + "timestamp": "2025-09-10 02:28:45.954240", + "step": 6083, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:45.992236", + "step": 6083, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022245707805268466, + "timestamp": "2025-09-10 02:28:46.026454", + "step": 6084, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:46.060512", + "step": 6084, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000945181876886636, + "timestamp": "2025-09-10 02:28:46.068454", + "step": 6085, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:28:46.103091", + "step": 6085, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004430218134075403, + "timestamp": "2025-09-10 02:28:46.116492", + "step": 6086, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:46.151682", + "step": 6086, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.371793329482898e-05, + "timestamp": "2025-09-10 02:28:46.156048", + "step": 6087, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:46.191776", + "step": 6087, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032512666075490415, + "timestamp": "2025-09-10 02:28:46.220288", + "step": 6088, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:46.253403", + "step": 6088, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032571834162808955, + "timestamp": "2025-09-10 02:28:46.258905", + "step": 6089, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:46.294231", + "step": 6089, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016125816851854324, + "timestamp": "2025-09-10 02:28:46.305045", + "step": 6090, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:46.336814", + "step": 6090, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.697331365197897e-05, + "timestamp": "2025-09-10 02:28:46.339693", + "step": 6091, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:46.378084", + "step": 6091, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005751307471655309, + "timestamp": "2025-09-10 02:28:46.409404", + "step": 6092, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:46.450036", + "step": 6092, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.076838376931846e-05, + "timestamp": "2025-09-10 02:28:46.456193", + "step": 6093, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 624 + ], + "flops": 18509808050496 + }, + "timestamp": "2025-09-10 02:28:46.508832", + "step": 6093, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009186511742882431, + "timestamp": "2025-09-10 02:28:46.530588", + "step": 6094, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:46.562802", + "step": 6094, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006315763108432293, + "timestamp": "2025-09-10 02:28:46.569897", + "step": 6095, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:46.600861", + "step": 6095, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013419199967756867, + "timestamp": "2025-09-10 02:28:46.625650", + "step": 6096, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:28:46.665746", + "step": 6096, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002742501674219966, + "timestamp": "2025-09-10 02:28:46.682715", + "step": 6097, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:46.720741", + "step": 6097, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0033741388469934464, + "timestamp": "2025-09-10 02:28:46.733329", + "step": 6098, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:46.764850", + "step": 6098, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.133829522994347e-05, + "timestamp": "2025-09-10 02:28:46.771679", + "step": 6099, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:46.811405", + "step": 6099, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.3357093090889975e-05, + "timestamp": "2025-09-10 02:28:46.835866", + "step": 6100, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:46.875977", + "step": 6100, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026234795222990215, + "timestamp": "2025-09-10 02:28:46.884317", + "step": 6101, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:46.923251", + "step": 6101, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012434854870662093, + "timestamp": "2025-09-10 02:28:46.927769", + "step": 6102, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:46.960500", + "step": 6102, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021313535049557686, + "timestamp": "2025-09-10 02:28:46.967469", + "step": 6103, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:47.000997", + "step": 6103, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003981561399996281, + "timestamp": "2025-09-10 02:28:47.028737", + "step": 6104, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:47.065770", + "step": 6104, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014674547128379345, + "timestamp": "2025-09-10 02:28:47.074269", + "step": 6105, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:47.109901", + "step": 6105, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017406666884198785, + "timestamp": "2025-09-10 02:28:47.112275", + "step": 6106, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:47.142972", + "step": 6106, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001538008509669453, + "timestamp": "2025-09-10 02:28:47.150025", + "step": 6107, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:47.184350", + "step": 6107, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009951989632099867, + "timestamp": "2025-09-10 02:28:47.217782", + "step": 6108, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:28:47.257856", + "step": 6108, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021144095808267593, + "timestamp": "2025-09-10 02:28:47.273336", + "step": 6109, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:47.307354", + "step": 6109, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008319019107148051, + "timestamp": "2025-09-10 02:28:47.317527", + "step": 6110, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:47.353515", + "step": 6110, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018621633353177458, + "timestamp": "2025-09-10 02:28:47.358189", + "step": 6111, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:47.398595", + "step": 6111, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03902193531394005, + "timestamp": "2025-09-10 02:28:47.426998", + "step": 6112, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:47.458338", + "step": 6112, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.721451816498302e-05, + "timestamp": "2025-09-10 02:28:47.460447", + "step": 6113, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:28:47.491411", + "step": 6113, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000635522126685828, + "timestamp": "2025-09-10 02:28:47.497979", + "step": 6114, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:47.532982", + "step": 6114, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002452080079820007, + "timestamp": "2025-09-10 02:28:47.540095", + "step": 6115, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:47.574851", + "step": 6115, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011523573193699121, + "timestamp": "2025-09-10 02:28:47.603452", + "step": 6116, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:47.650342", + "step": 6116, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015113291738089174, + "timestamp": "2025-09-10 02:28:47.658203", + "step": 6117, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:47.701167", + "step": 6117, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004939243663102388, + "timestamp": "2025-09-10 02:28:47.709590", + "step": 6118, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:28:47.775156", + "step": 6118, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02475116029381752, + "timestamp": "2025-09-10 02:28:47.796645", + "step": 6119, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:47.827951", + "step": 6119, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.918137008440681e-05, + "timestamp": "2025-09-10 02:28:47.853222", + "step": 6120, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:47.885164", + "step": 6120, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.542963168001734e-05, + "timestamp": "2025-09-10 02:28:47.887509", + "step": 6121, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:47.917812", + "step": 6121, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.046116914600133896, + "timestamp": "2025-09-10 02:28:47.922199", + "step": 6122, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:47.954445", + "step": 6122, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0024645677767693996, + "timestamp": "2025-09-10 02:28:47.962071", + "step": 6123, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:47.992822", + "step": 6123, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003803297586273402, + "timestamp": "2025-09-10 02:28:48.018057", + "step": 6124, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:48.056437", + "step": 6124, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047259125858545303, + "timestamp": "2025-09-10 02:28:48.061808", + "step": 6125, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:48.093197", + "step": 6125, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037615117616951466, + "timestamp": "2025-09-10 02:28:48.097671", + "step": 6126, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:48.129051", + "step": 6126, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004323399916756898, + "timestamp": "2025-09-10 02:28:48.135875", + "step": 6127, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:48.172723", + "step": 6127, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0437922365963459, + "timestamp": "2025-09-10 02:28:48.201023", + "step": 6128, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:48.235490", + "step": 6128, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000819290173240006, + "timestamp": "2025-09-10 02:28:48.240886", + "step": 6129, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:48.280162", + "step": 6129, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019732918008230627, + "timestamp": "2025-09-10 02:28:48.292529", + "step": 6130, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:48.323914", + "step": 6130, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008840580121614039, + "timestamp": "2025-09-10 02:28:48.331410", + "step": 6131, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:48.361984", + "step": 6131, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.019107328727841377, + "timestamp": "2025-09-10 02:28:48.389830", + "step": 6132, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:48.421969", + "step": 6132, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.087460446404293e-05, + "timestamp": "2025-09-10 02:28:48.430257", + "step": 6133, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:48.462010", + "step": 6133, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008161481469869614, + "timestamp": "2025-09-10 02:28:48.469397", + "step": 6134, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:48.500501", + "step": 6134, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04065088555216789, + "timestamp": "2025-09-10 02:28:48.508237", + "step": 6135, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:48.539182", + "step": 6135, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001969260396435857, + "timestamp": "2025-09-10 02:28:48.567465", + "step": 6136, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:48.601560", + "step": 6136, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010869913967326283, + "timestamp": "2025-09-10 02:28:48.606358", + "step": 6137, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:48.637725", + "step": 6137, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005913428612984717, + "timestamp": "2025-09-10 02:28:48.642348", + "step": 6138, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:48.675095", + "step": 6138, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.689595466013998e-05, + "timestamp": "2025-09-10 02:28:48.682623", + "step": 6139, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:48.714074", + "step": 6139, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005156568717211485, + "timestamp": "2025-09-10 02:28:48.742463", + "step": 6140, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:28:48.774908", + "step": 6140, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014441793609876186, + "timestamp": "2025-09-10 02:28:48.784687", + "step": 6141, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:48.818569", + "step": 6141, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004379312158562243, + "timestamp": "2025-09-10 02:28:48.826274", + "step": 6142, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:48.859648", + "step": 6142, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012073171325027943, + "timestamp": "2025-09-10 02:28:48.866500", + "step": 6143, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:48.904904", + "step": 6143, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002663929190021008, + "timestamp": "2025-09-10 02:28:48.932951", + "step": 6144, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:28:48.986342", + "step": 6144, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007393588311970234, + "timestamp": "2025-09-10 02:28:49.010022", + "step": 6145, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:49.041968", + "step": 6145, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043535669101402164, + "timestamp": "2025-09-10 02:28:49.048697", + "step": 6146, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:49.080271", + "step": 6146, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002159666968509555, + "timestamp": "2025-09-10 02:28:49.087247", + "step": 6147, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:28:49.117573", + "step": 6147, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001196373486891389, + "timestamp": "2025-09-10 02:28:49.142819", + "step": 6148, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:49.173591", + "step": 6148, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02430625446140766, + "timestamp": "2025-09-10 02:28:49.182109", + "step": 6149, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:49.213236", + "step": 6149, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001197171164676547, + "timestamp": "2025-09-10 02:28:49.223428", + "step": 6150, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:49.254356", + "step": 6150, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013094481255393475, + "timestamp": "2025-09-10 02:28:49.258537", + "step": 6151, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:28:49.288607", + "step": 6151, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014958838000893593, + "timestamp": "2025-09-10 02:28:49.312699", + "step": 6152, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:49.344110", + "step": 6152, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031426880741491914, + "timestamp": "2025-09-10 02:28:49.348821", + "step": 6153, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:49.379500", + "step": 6153, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012687371345236897, + "timestamp": "2025-09-10 02:28:49.389987", + "step": 6154, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:49.421111", + "step": 6154, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014976828242652118, + "timestamp": "2025-09-10 02:28:49.432014", + "step": 6155, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:49.464403", + "step": 6155, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032478817738592625, + "timestamp": "2025-09-10 02:28:49.492166", + "step": 6156, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:49.523071", + "step": 6156, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014056127984076738, + "timestamp": "2025-09-10 02:28:49.528168", + "step": 6157, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:28:49.559237", + "step": 6157, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010582717368379235, + "timestamp": "2025-09-10 02:28:49.567068", + "step": 6158, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:49.599764", + "step": 6158, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013609221787191927, + "timestamp": "2025-09-10 02:28:49.606908", + "step": 6159, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:49.639359", + "step": 6159, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004430489207152277, + "timestamp": "2025-09-10 02:28:49.667476", + "step": 6160, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:49.699751", + "step": 6160, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001818513497710228, + "timestamp": "2025-09-10 02:28:49.707627", + "step": 6161, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:28:49.747955", + "step": 6161, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037555742892436683, + "timestamp": "2025-09-10 02:28:49.763907", + "step": 6162, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:49.796138", + "step": 6162, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007206489099189639, + "timestamp": "2025-09-10 02:28:49.808649", + "step": 6163, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:49.839676", + "step": 6163, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002763103402685374, + "timestamp": "2025-09-10 02:28:49.870777", + "step": 6164, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:28:49.902544", + "step": 6164, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009474234539084136, + "timestamp": "2025-09-10 02:28:49.907330", + "step": 6165, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:28:49.939488", + "step": 6165, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009898262796923518, + "timestamp": "2025-09-10 02:28:49.949704", + "step": 6166, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:49.980929", + "step": 6166, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000859494844917208, + "timestamp": "2025-09-10 02:28:49.988378", + "step": 6167, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:28:50.020605", + "step": 6167, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009193348814733326, + "timestamp": "2025-09-10 02:28:50.053933", + "step": 6168, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:50.093804", + "step": 6168, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004276874475181103, + "timestamp": "2025-09-10 02:28:50.102501", + "step": 6169, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:50.136920", + "step": 6169, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00454701716080308, + "timestamp": "2025-09-10 02:28:50.144309", + "step": 6170, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:28:50.175129", + "step": 6170, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004608361341524869, + "timestamp": "2025-09-10 02:28:50.186036", + "step": 6171, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:28:50.220449", + "step": 6171, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000643234234303236, + "timestamp": "2025-09-10 02:28:50.248738", + "step": 6172, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:28:50.281343", + "step": 6172, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001443680957891047, + "timestamp": "2025-09-10 02:28:50.285965", + "step": 6173, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:28:50.318501", + "step": 6173, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007593140471726656, + "timestamp": "2025-09-10 02:28:50.322577", + "step": 6174, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:29:00.483350", + "step": 6174, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23762090.47420289, + "timestamp": "2025-09-10 02:29:00.489527", + "step": 6174, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:00.522250", + "step": 6174, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009576130541972816, + "timestamp": "2025-09-10 02:29:00.528475", + "step": 6175, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:00.565354", + "step": 6175, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006735025439411402, + "timestamp": "2025-09-10 02:29:00.593441", + "step": 6176, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:00.627618", + "step": 6176, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005353983025997877, + "timestamp": "2025-09-10 02:29:00.635299", + "step": 6177, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:00.669053", + "step": 6177, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004146324936300516, + "timestamp": "2025-09-10 02:29:00.673169", + "step": 6178, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:00.709091", + "step": 6178, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0026420990470796824, + "timestamp": "2025-09-10 02:29:00.714523", + "step": 6179, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:29:00.758352", + "step": 6179, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005170282907783985, + "timestamp": "2025-09-10 02:29:00.796546", + "step": 6180, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:29:00.836181", + "step": 6180, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003013778477907181, + "timestamp": "2025-09-10 02:29:00.851358", + "step": 6181, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:00.886755", + "step": 6181, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001864836667664349, + "timestamp": "2025-09-10 02:29:00.897162", + "step": 6182, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:00.935035", + "step": 6182, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007107080891728401, + "timestamp": "2025-09-10 02:29:00.946950", + "step": 6183, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:29:00.991051", + "step": 6183, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008580170688219368, + "timestamp": "2025-09-10 02:29:01.028951", + "step": 6184, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:01.064026", + "step": 6184, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007555230520665646, + "timestamp": "2025-09-10 02:29:01.068065", + "step": 6185, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:29:01.111953", + "step": 6185, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021088081412017345, + "timestamp": "2025-09-10 02:29:01.128296", + "step": 6186, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:01.162621", + "step": 6186, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008245863718912005, + "timestamp": "2025-09-10 02:29:01.169216", + "step": 6187, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:01.202455", + "step": 6187, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007418735884130001, + "timestamp": "2025-09-10 02:29:01.233410", + "step": 6188, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:01.266922", + "step": 6188, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027580276946537197, + "timestamp": "2025-09-10 02:29:01.275668", + "step": 6189, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:01.309893", + "step": 6189, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014374948805198073, + "timestamp": "2025-09-10 02:29:01.320644", + "step": 6190, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:01.353527", + "step": 6190, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003637855697888881, + "timestamp": "2025-09-10 02:29:01.357634", + "step": 6191, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:01.390955", + "step": 6191, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003992599435150623, + "timestamp": "2025-09-10 02:29:01.423392", + "step": 6192, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:01.457871", + "step": 6192, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.350113886175677e-05, + "timestamp": "2025-09-10 02:29:01.461906", + "step": 6193, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:01.496563", + "step": 6193, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022764094173908234, + "timestamp": "2025-09-10 02:29:01.507981", + "step": 6194, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:01.541893", + "step": 6194, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012956062331795692, + "timestamp": "2025-09-10 02:29:01.551989", + "step": 6195, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:01.586391", + "step": 6195, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000641929917037487, + "timestamp": "2025-09-10 02:29:01.617664", + "step": 6196, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:01.651653", + "step": 6196, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020424830727279186, + "timestamp": "2025-09-10 02:29:01.661198", + "step": 6197, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:01.693355", + "step": 6197, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005121281137689948, + "timestamp": "2025-09-10 02:29:01.699969", + "step": 6198, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:01.732326", + "step": 6198, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009792317869141698, + "timestamp": "2025-09-10 02:29:01.736720", + "step": 6199, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:01.770056", + "step": 6199, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007432979182340205, + "timestamp": "2025-09-10 02:29:01.795525", + "step": 6200, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:01.830760", + "step": 6200, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006474620313383639, + "timestamp": "2025-09-10 02:29:01.835262", + "step": 6201, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:01.868661", + "step": 6201, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018694715108722448, + "timestamp": "2025-09-10 02:29:01.873579", + "step": 6202, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:01.904865", + "step": 6202, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001604403369128704, + "timestamp": "2025-09-10 02:29:01.909029", + "step": 6203, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:01.954060", + "step": 6203, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.979109533247538e-05, + "timestamp": "2025-09-10 02:29:01.988965", + "step": 6204, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:29:02.020146", + "step": 6204, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006631503812968731, + "timestamp": "2025-09-10 02:29:02.023430", + "step": 6205, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:02.057417", + "step": 6205, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003117796266451478, + "timestamp": "2025-09-10 02:29:02.064614", + "step": 6206, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:02.098793", + "step": 6206, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004380426835268736, + "timestamp": "2025-09-10 02:29:02.108753", + "step": 6207, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:02.141675", + "step": 6207, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.016728242859244347, + "timestamp": "2025-09-10 02:29:02.170150", + "step": 6208, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:02.199959", + "step": 6208, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002014095662161708, + "timestamp": "2025-09-10 02:29:02.202448", + "step": 6209, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:02.238990", + "step": 6209, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007832984556443989, + "timestamp": "2025-09-10 02:29:02.248922", + "step": 6210, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:02.283798", + "step": 6210, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007357418653555214, + "timestamp": "2025-09-10 02:29:02.294566", + "step": 6211, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:02.329738", + "step": 6211, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02407163567841053, + "timestamp": "2025-09-10 02:29:02.369183", + "step": 6212, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:02.402828", + "step": 6212, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.155436808010563e-05, + "timestamp": "2025-09-10 02:29:02.407820", + "step": 6213, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:02.448483", + "step": 6213, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003762414853554219, + "timestamp": "2025-09-10 02:29:02.453924", + "step": 6214, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:02.492975", + "step": 6214, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001163300359621644, + "timestamp": "2025-09-10 02:29:02.500308", + "step": 6215, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:02.540491", + "step": 6215, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011203320696949959, + "timestamp": "2025-09-10 02:29:02.571357", + "step": 6216, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:02.606934", + "step": 6216, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006556047010235488, + "timestamp": "2025-09-10 02:29:02.611843", + "step": 6217, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:29:02.656645", + "step": 6217, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007842601626180112, + "timestamp": "2025-09-10 02:29:02.674001", + "step": 6218, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:02.707892", + "step": 6218, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004893920267932117, + "timestamp": "2025-09-10 02:29:02.714573", + "step": 6219, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:02.750107", + "step": 6219, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008870299207046628, + "timestamp": "2025-09-10 02:29:02.778290", + "step": 6220, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:02.810971", + "step": 6220, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0022531996946781874, + "timestamp": "2025-09-10 02:29:02.815289", + "step": 6221, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:29:02.858836", + "step": 6221, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007312820525839925, + "timestamp": "2025-09-10 02:29:02.874928", + "step": 6222, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:02.912150", + "step": 6222, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002016266662394628, + "timestamp": "2025-09-10 02:29:02.922542", + "step": 6223, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:02.958404", + "step": 6223, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006459229625761509, + "timestamp": "2025-09-10 02:29:02.986941", + "step": 6224, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:03.019933", + "step": 6224, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004273521772120148, + "timestamp": "2025-09-10 02:29:03.024404", + "step": 6225, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:03.059004", + "step": 6225, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010626261064317077, + "timestamp": "2025-09-10 02:29:03.065725", + "step": 6226, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:03.101442", + "step": 6226, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011228015646338463, + "timestamp": "2025-09-10 02:29:03.108686", + "step": 6227, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:03.147917", + "step": 6227, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005814795149490237, + "timestamp": "2025-09-10 02:29:03.176118", + "step": 6228, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:03.212972", + "step": 6228, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.321633408224443e-05, + "timestamp": "2025-09-10 02:29:03.218168", + "step": 6229, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:03.262762", + "step": 6229, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002059446182101965, + "timestamp": "2025-09-10 02:29:03.274682", + "step": 6230, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:03.307936", + "step": 6230, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014323127688840032, + "timestamp": "2025-09-10 02:29:03.315020", + "step": 6231, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:03.347935", + "step": 6231, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002452459593769163, + "timestamp": "2025-09-10 02:29:03.378692", + "step": 6232, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:03.413946", + "step": 6232, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.01287010032683611, + "timestamp": "2025-09-10 02:29:03.423501", + "step": 6233, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:03.463502", + "step": 6233, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.2527696021134034e-05, + "timestamp": "2025-09-10 02:29:03.470318", + "step": 6234, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:03.508243", + "step": 6234, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00437973951920867, + "timestamp": "2025-09-10 02:29:03.515668", + "step": 6235, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:03.552354", + "step": 6235, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010270438157022, + "timestamp": "2025-09-10 02:29:03.578323", + "step": 6236, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:03.612892", + "step": 6236, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011186213669134304, + "timestamp": "2025-09-10 02:29:03.619445", + "step": 6237, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:03.654461", + "step": 6237, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016674351354595274, + "timestamp": "2025-09-10 02:29:03.661753", + "step": 6238, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:03.706916", + "step": 6238, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005096830427646637, + "timestamp": "2025-09-10 02:29:03.714335", + "step": 6239, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:29:03.750293", + "step": 6239, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.230676030507311e-05, + "timestamp": "2025-09-10 02:29:03.777459", + "step": 6240, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:29:03.816710", + "step": 6240, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013677349488716573, + "timestamp": "2025-09-10 02:29:03.832166", + "step": 6241, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:03.868850", + "step": 6241, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.3667438654229045e-05, + "timestamp": "2025-09-10 02:29:03.880696", + "step": 6242, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:03.915934", + "step": 6242, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017127035243902355, + "timestamp": "2025-09-10 02:29:03.922666", + "step": 6243, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:03.957512", + "step": 6243, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002690852852538228, + "timestamp": "2025-09-10 02:29:03.985803", + "step": 6244, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:04.022765", + "step": 6244, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017910164024215192, + "timestamp": "2025-09-10 02:29:04.030531", + "step": 6245, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:04.066882", + "step": 6245, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005587777122855186, + "timestamp": "2025-09-10 02:29:04.074089", + "step": 6246, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:29:04.118109", + "step": 6246, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008401383529417217, + "timestamp": "2025-09-10 02:29:04.135472", + "step": 6247, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:04.170553", + "step": 6247, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020682509057223797, + "timestamp": "2025-09-10 02:29:04.201548", + "step": 6248, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:04.236004", + "step": 6248, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007200734107755125, + "timestamp": "2025-09-10 02:29:04.239906", + "step": 6249, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:04.272787", + "step": 6249, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019148035789839923, + "timestamp": "2025-09-10 02:29:04.279772", + "step": 6250, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:04.315061", + "step": 6250, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.394280040287413e-05, + "timestamp": "2025-09-10 02:29:04.317814", + "step": 6251, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:04.351509", + "step": 6251, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002166307531297207, + "timestamp": "2025-09-10 02:29:04.377873", + "step": 6252, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:04.421383", + "step": 6252, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003251233429182321, + "timestamp": "2025-09-10 02:29:04.428899", + "step": 6253, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:04.467402", + "step": 6253, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010890467092394829, + "timestamp": "2025-09-10 02:29:04.474674", + "step": 6254, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:04.509098", + "step": 6254, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005082925199531019, + "timestamp": "2025-09-10 02:29:04.511914", + "step": 6255, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:04.545351", + "step": 6255, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006019308930262923, + "timestamp": "2025-09-10 02:29:04.576810", + "step": 6256, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:04.613745", + "step": 6256, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034762476570904255, + "timestamp": "2025-09-10 02:29:04.617911", + "step": 6257, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:04.650328", + "step": 6257, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011139985144836828, + "timestamp": "2025-09-10 02:29:04.660196", + "step": 6258, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:04.698657", + "step": 6258, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037154555320739746, + "timestamp": "2025-09-10 02:29:04.712334", + "step": 6259, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:04.746352", + "step": 6259, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00046982159256003797, + "timestamp": "2025-09-10 02:29:04.773897", + "step": 6260, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:04.804817", + "step": 6260, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027465628227218986, + "timestamp": "2025-09-10 02:29:04.807534", + "step": 6261, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:04.838605", + "step": 6261, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002272390847792849, + "timestamp": "2025-09-10 02:29:04.848663", + "step": 6262, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:04.880669", + "step": 6262, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003084797062911093, + "timestamp": "2025-09-10 02:29:04.888316", + "step": 6263, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:04.924069", + "step": 6263, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007478706538677216, + "timestamp": "2025-09-10 02:29:04.949180", + "step": 6264, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:04.983852", + "step": 6264, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004629208124242723, + "timestamp": "2025-09-10 02:29:04.992116", + "step": 6265, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:05.025256", + "step": 6265, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001887015241663903, + "timestamp": "2025-09-10 02:29:05.029368", + "step": 6266, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:05.062885", + "step": 6266, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001650593476369977, + "timestamp": "2025-09-10 02:29:05.069594", + "step": 6267, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:05.102767", + "step": 6267, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003856061666738242, + "timestamp": "2025-09-10 02:29:05.130373", + "step": 6268, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:05.164244", + "step": 6268, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007830560207366943, + "timestamp": "2025-09-10 02:29:05.166809", + "step": 6269, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:05.203631", + "step": 6269, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001599523238837719, + "timestamp": "2025-09-10 02:29:05.216216", + "step": 6270, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:05.249646", + "step": 6270, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012749488232657313, + "timestamp": "2025-09-10 02:29:05.257268", + "step": 6271, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:05.292066", + "step": 6271, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036235000006854534, + "timestamp": "2025-09-10 02:29:05.321219", + "step": 6272, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:05.365793", + "step": 6272, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004903791705146432, + "timestamp": "2025-09-10 02:29:05.369874", + "step": 6273, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:05.402955", + "step": 6273, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.18557057552971e-05, + "timestamp": "2025-09-10 02:29:05.409791", + "step": 6274, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:05.443844", + "step": 6274, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.8820067351916805e-05, + "timestamp": "2025-09-10 02:29:05.455625", + "step": 6275, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:05.491242", + "step": 6275, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001387560332659632, + "timestamp": "2025-09-10 02:29:05.524569", + "step": 6276, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:05.557513", + "step": 6276, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.674280252307653e-05, + "timestamp": "2025-09-10 02:29:05.561570", + "step": 6277, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:05.597546", + "step": 6277, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016429023817181587, + "timestamp": "2025-09-10 02:29:05.602308", + "step": 6278, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:05.635431", + "step": 6278, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003711978788487613, + "timestamp": "2025-09-10 02:29:05.642513", + "step": 6279, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:05.678251", + "step": 6279, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0036703094374388456, + "timestamp": "2025-09-10 02:29:05.706203", + "step": 6280, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:05.739233", + "step": 6280, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004256887186784297, + "timestamp": "2025-09-10 02:29:05.747299", + "step": 6281, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:05.780857", + "step": 6281, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036882911808788776, + "timestamp": "2025-09-10 02:29:05.788219", + "step": 6282, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:05.826181", + "step": 6282, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006809058249928057, + "timestamp": "2025-09-10 02:29:05.839544", + "step": 6283, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:05.874700", + "step": 6283, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0020839818753302097, + "timestamp": "2025-09-10 02:29:05.899681", + "step": 6284, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:05.934688", + "step": 6284, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040493832784704864, + "timestamp": "2025-09-10 02:29:05.940112", + "step": 6285, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:05.978214", + "step": 6285, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.699164027348161e-05, + "timestamp": "2025-09-10 02:29:05.991891", + "step": 6286, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:06.026523", + "step": 6286, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018825959705282003, + "timestamp": "2025-09-10 02:29:06.038487", + "step": 6287, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:06.072604", + "step": 6287, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004126779385842383, + "timestamp": "2025-09-10 02:29:06.097564", + "step": 6288, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:06.128927", + "step": 6288, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02210475504398346, + "timestamp": "2025-09-10 02:29:06.131446", + "step": 6289, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:06.164947", + "step": 6289, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003806811582762748, + "timestamp": "2025-09-10 02:29:06.171748", + "step": 6290, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:06.203970", + "step": 6290, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002564129186794162, + "timestamp": "2025-09-10 02:29:06.208366", + "step": 6291, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:06.240294", + "step": 6291, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007361977477557957, + "timestamp": "2025-09-10 02:29:06.268273", + "step": 6292, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:29:06.299459", + "step": 6292, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.1402661736356094e-05, + "timestamp": "2025-09-10 02:29:06.301852", + "step": 6293, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:06.333635", + "step": 6293, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.298778341966681e-05, + "timestamp": "2025-09-10 02:29:06.340760", + "step": 6294, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:06.373775", + "step": 6294, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04112233966588974, + "timestamp": "2025-09-10 02:29:06.381249", + "step": 6295, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:06.413683", + "step": 6295, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010367208160459995, + "timestamp": "2025-09-10 02:29:06.441519", + "step": 6296, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:06.473067", + "step": 6296, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000295735226245597, + "timestamp": "2025-09-10 02:29:06.482727", + "step": 6297, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:06.515480", + "step": 6297, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019989509019069374, + "timestamp": "2025-09-10 02:29:06.522560", + "step": 6298, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:06.553317", + "step": 6298, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.843276737025008e-05, + "timestamp": "2025-09-10 02:29:06.564281", + "step": 6299, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:06.596144", + "step": 6299, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021590027608908713, + "timestamp": "2025-09-10 02:29:06.629593", + "step": 6300, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:06.661426", + "step": 6300, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.1329996495041996e-05, + "timestamp": "2025-09-10 02:29:06.665540", + "step": 6301, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:06.698119", + "step": 6301, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0038885578978806734, + "timestamp": "2025-09-10 02:29:06.707892", + "step": 6302, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:06.740805", + "step": 6302, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019760747090913355, + "timestamp": "2025-09-10 02:29:06.747974", + "step": 6303, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:06.780375", + "step": 6303, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.984067785087973e-05, + "timestamp": "2025-09-10 02:29:06.811653", + "step": 6304, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:06.843758", + "step": 6304, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008169592474587262, + "timestamp": "2025-09-10 02:29:06.851168", + "step": 6305, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:06.882525", + "step": 6305, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003005763574037701, + "timestamp": "2025-09-10 02:29:06.890220", + "step": 6306, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:29:06.921053", + "step": 6306, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.076990681933239e-05, + "timestamp": "2025-09-10 02:29:06.923924", + "step": 6307, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:06.955910", + "step": 6307, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.251143375062384e-05, + "timestamp": "2025-09-10 02:29:06.988551", + "step": 6308, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:07.020723", + "step": 6308, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.822443487588316e-05, + "timestamp": "2025-09-10 02:29:07.025564", + "step": 6309, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:07.056508", + "step": 6309, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012206012615934014, + "timestamp": "2025-09-10 02:29:07.059078", + "step": 6310, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:07.092205", + "step": 6310, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.703573308186606e-05, + "timestamp": "2025-09-10 02:29:07.102240", + "step": 6311, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:07.134448", + "step": 6311, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.877570897107944e-05, + "timestamp": "2025-09-10 02:29:07.165292", + "step": 6312, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:07.197350", + "step": 6312, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010713493247749284, + "timestamp": "2025-09-10 02:29:07.202384", + "step": 6313, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:07.235524", + "step": 6313, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.199342609150335e-05, + "timestamp": "2025-09-10 02:29:07.248852", + "step": 6314, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:07.280036", + "step": 6314, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02364686317741871, + "timestamp": "2025-09-10 02:29:07.283883", + "step": 6315, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:07.315780", + "step": 6315, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002355035103391856, + "timestamp": "2025-09-10 02:29:07.348371", + "step": 6316, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:07.383001", + "step": 6316, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014525903679896146, + "timestamp": "2025-09-10 02:29:07.388034", + "step": 6317, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:07.419491", + "step": 6317, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019105462706647813, + "timestamp": "2025-09-10 02:29:07.431764", + "step": 6318, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:07.463177", + "step": 6318, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010136031778529286, + "timestamp": "2025-09-10 02:29:07.469800", + "step": 6319, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:07.500713", + "step": 6319, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030389634775929153, + "timestamp": "2025-09-10 02:29:07.528449", + "step": 6320, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:07.559425", + "step": 6320, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005676125292666256, + "timestamp": "2025-09-10 02:29:07.564296", + "step": 6321, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:29:18.130853", + "step": 6321, + "epoch": 3 + }, + { + "type": "pplx", + "content": 22779605.39083871, + "timestamp": "2025-09-10 02:29:18.134652", + "step": 6321, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:18.167347", + "step": 6321, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000332222378347069, + "timestamp": "2025-09-10 02:29:18.173098", + "step": 6322, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:18.205647", + "step": 6322, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003474770812317729, + "timestamp": "2025-09-10 02:29:18.209511", + "step": 6323, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:18.240090", + "step": 6323, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007376385619863868, + "timestamp": "2025-09-10 02:29:18.267870", + "step": 6324, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:18.306032", + "step": 6324, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.021130096167325974, + "timestamp": "2025-09-10 02:29:18.310959", + "step": 6325, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:18.343599", + "step": 6325, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008156650001183152, + "timestamp": "2025-09-10 02:29:18.351079", + "step": 6326, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:18.391999", + "step": 6326, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.109603105462156e-05, + "timestamp": "2025-09-10 02:29:18.405697", + "step": 6327, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:29:18.445889", + "step": 6327, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003445267793722451, + "timestamp": "2025-09-10 02:29:18.482728", + "step": 6328, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:18.517430", + "step": 6328, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021596229635179043, + "timestamp": "2025-09-10 02:29:18.522150", + "step": 6329, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:18.554164", + "step": 6329, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005771416472271085, + "timestamp": "2025-09-10 02:29:18.564176", + "step": 6330, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:18.602787", + "step": 6330, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002720944758038968, + "timestamp": "2025-09-10 02:29:18.609250", + "step": 6331, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:18.640525", + "step": 6331, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016264248406514525, + "timestamp": "2025-09-10 02:29:18.669868", + "step": 6332, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:18.701530", + "step": 6332, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010872381972149014, + "timestamp": "2025-09-10 02:29:18.711391", + "step": 6333, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:18.743000", + "step": 6333, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038386922096833587, + "timestamp": "2025-09-10 02:29:18.749953", + "step": 6334, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:18.780151", + "step": 6334, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001077132037607953, + "timestamp": "2025-09-10 02:29:18.787758", + "step": 6335, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:18.819772", + "step": 6335, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004933988093398511, + "timestamp": "2025-09-10 02:29:18.853286", + "step": 6336, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:18.885241", + "step": 6336, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017707353690639138, + "timestamp": "2025-09-10 02:29:18.890646", + "step": 6337, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:18.921242", + "step": 6337, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026189981144852936, + "timestamp": "2025-09-10 02:29:18.929151", + "step": 6338, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:18.960376", + "step": 6338, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.07036115974187851, + "timestamp": "2025-09-10 02:29:18.967898", + "step": 6339, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:19.000684", + "step": 6339, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025524134980514646, + "timestamp": "2025-09-10 02:29:19.033580", + "step": 6340, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:19.063853", + "step": 6340, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033761485246941447, + "timestamp": "2025-09-10 02:29:19.068626", + "step": 6341, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:19.099684", + "step": 6341, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010502010118216276, + "timestamp": "2025-09-10 02:29:19.104150", + "step": 6342, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:19.136087", + "step": 6342, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003272096801083535, + "timestamp": "2025-09-10 02:29:19.143049", + "step": 6343, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:19.174394", + "step": 6343, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.662676368141547e-05, + "timestamp": "2025-09-10 02:29:19.205407", + "step": 6344, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:19.235942", + "step": 6344, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015760198584757745, + "timestamp": "2025-09-10 02:29:19.238088", + "step": 6345, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:19.269413", + "step": 6345, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003509795351419598, + "timestamp": "2025-09-10 02:29:19.279455", + "step": 6346, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:19.309940", + "step": 6346, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002398541197180748, + "timestamp": "2025-09-10 02:29:19.316540", + "step": 6347, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:19.347416", + "step": 6347, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.516561552416533e-05, + "timestamp": "2025-09-10 02:29:19.378249", + "step": 6348, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:19.408493", + "step": 6348, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.48638849472627e-05, + "timestamp": "2025-09-10 02:29:19.410610", + "step": 6349, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:19.441360", + "step": 6349, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.105393989244476e-05, + "timestamp": "2025-09-10 02:29:19.445757", + "step": 6350, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:19.477340", + "step": 6350, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025512793217785656, + "timestamp": "2025-09-10 02:29:19.484997", + "step": 6351, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:19.515787", + "step": 6351, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005597950075753033, + "timestamp": "2025-09-10 02:29:19.540639", + "step": 6352, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:19.571736", + "step": 6352, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011579144047573209, + "timestamp": "2025-09-10 02:29:19.575426", + "step": 6353, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:19.607848", + "step": 6353, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.454350609099492e-05, + "timestamp": "2025-09-10 02:29:19.615373", + "step": 6354, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:19.647231", + "step": 6354, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.9956550103379413e-05, + "timestamp": "2025-09-10 02:29:19.656938", + "step": 6355, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:19.687539", + "step": 6355, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.7725461879745126e-05, + "timestamp": "2025-09-10 02:29:19.715332", + "step": 6356, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:19.748381", + "step": 6356, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045506874448619783, + "timestamp": "2025-09-10 02:29:19.757975", + "step": 6357, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:19.789077", + "step": 6357, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005231906659901142, + "timestamp": "2025-09-10 02:29:19.795938", + "step": 6358, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:19.827714", + "step": 6358, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005539475823752582, + "timestamp": "2025-09-10 02:29:19.834445", + "step": 6359, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:19.865423", + "step": 6359, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00410681264474988, + "timestamp": "2025-09-10 02:29:19.893284", + "step": 6360, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:19.923869", + "step": 6360, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008158148848451674, + "timestamp": "2025-09-10 02:29:19.929029", + "step": 6361, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:19.963810", + "step": 6361, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011450514430180192, + "timestamp": "2025-09-10 02:29:19.977531", + "step": 6362, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:20.009029", + "step": 6362, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005734324222430587, + "timestamp": "2025-09-10 02:29:20.012932", + "step": 6363, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:20.044053", + "step": 6363, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.529059414286166e-05, + "timestamp": "2025-09-10 02:29:20.072584", + "step": 6364, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:20.105593", + "step": 6364, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.022070255130529404, + "timestamp": "2025-09-10 02:29:20.110631", + "step": 6365, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:20.150185", + "step": 6365, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003581468772608787, + "timestamp": "2025-09-10 02:29:20.157700", + "step": 6366, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:29:20.202522", + "step": 6366, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.015283132903277874, + "timestamp": "2025-09-10 02:29:20.218121", + "step": 6367, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:20.249342", + "step": 6367, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029461071244440973, + "timestamp": "2025-09-10 02:29:20.277746", + "step": 6368, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:20.310632", + "step": 6368, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010076651349663734, + "timestamp": "2025-09-10 02:29:20.315613", + "step": 6369, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:20.346636", + "step": 6369, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0035657952539622784, + "timestamp": "2025-09-10 02:29:20.353572", + "step": 6370, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:29:20.391678", + "step": 6370, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010319511406123638, + "timestamp": "2025-09-10 02:29:20.407379", + "step": 6371, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:29:20.448636", + "step": 6371, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019968757987953722, + "timestamp": "2025-09-10 02:29:20.486664", + "step": 6372, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:20.517082", + "step": 6372, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001550498272990808, + "timestamp": "2025-09-10 02:29:20.521556", + "step": 6373, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:20.552236", + "step": 6373, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003731503675226122, + "timestamp": "2025-09-10 02:29:20.559068", + "step": 6374, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:20.589976", + "step": 6374, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011220359738217667, + "timestamp": "2025-09-10 02:29:20.602529", + "step": 6375, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:20.636903", + "step": 6375, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002799866779241711, + "timestamp": "2025-09-10 02:29:20.670381", + "step": 6376, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:29:20.709953", + "step": 6376, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003151228418573737, + "timestamp": "2025-09-10 02:29:20.727181", + "step": 6377, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:20.758329", + "step": 6377, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033930037170648575, + "timestamp": "2025-09-10 02:29:20.770815", + "step": 6378, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:20.801897", + "step": 6378, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.999308243393898e-05, + "timestamp": "2025-09-10 02:29:20.808629", + "step": 6379, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:20.839608", + "step": 6379, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043307337909936905, + "timestamp": "2025-09-10 02:29:20.864987", + "step": 6380, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:20.895725", + "step": 6380, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034244899870827794, + "timestamp": "2025-09-10 02:29:20.898013", + "step": 6381, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:20.933233", + "step": 6381, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011802476365119219, + "timestamp": "2025-09-10 02:29:20.945269", + "step": 6382, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:20.982320", + "step": 6382, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002502583956811577, + "timestamp": "2025-09-10 02:29:20.993226", + "step": 6383, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:21.030608", + "step": 6383, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013230193872004747, + "timestamp": "2025-09-10 02:29:21.058653", + "step": 6384, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:21.093631", + "step": 6384, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0048297131434082985, + "timestamp": "2025-09-10 02:29:21.096468", + "step": 6385, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:21.129065", + "step": 6385, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.7518630556878634e-05, + "timestamp": "2025-09-10 02:29:21.134528", + "step": 6386, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:21.166246", + "step": 6386, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001769508671713993, + "timestamp": "2025-09-10 02:29:21.173212", + "step": 6387, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:21.205453", + "step": 6387, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.868801185395569e-05, + "timestamp": "2025-09-10 02:29:21.236430", + "step": 6388, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:21.272399", + "step": 6388, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004954601754434407, + "timestamp": "2025-09-10 02:29:21.281350", + "step": 6389, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:21.315513", + "step": 6389, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008721998310647905, + "timestamp": "2025-09-10 02:29:21.327525", + "step": 6390, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:21.361599", + "step": 6390, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.93388457573019e-05, + "timestamp": "2025-09-10 02:29:21.368612", + "step": 6391, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:21.400136", + "step": 6391, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.254738400457427e-05, + "timestamp": "2025-09-10 02:29:21.424203", + "step": 6392, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:21.458021", + "step": 6392, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006808959878981113, + "timestamp": "2025-09-10 02:29:21.466527", + "step": 6393, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:29:21.508602", + "step": 6393, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00033295477624051273, + "timestamp": "2025-09-10 02:29:21.524809", + "step": 6394, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:21.557536", + "step": 6394, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014583735028281808, + "timestamp": "2025-09-10 02:29:21.565081", + "step": 6395, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:21.603694", + "step": 6395, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000546331750229001, + "timestamp": "2025-09-10 02:29:21.638539", + "step": 6396, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:21.673034", + "step": 6396, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026958558009937406, + "timestamp": "2025-09-10 02:29:21.676535", + "step": 6397, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:29:21.712740", + "step": 6397, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034523935755714774, + "timestamp": "2025-09-10 02:29:21.726607", + "step": 6398, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:21.761790", + "step": 6398, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016174910706467927, + "timestamp": "2025-09-10 02:29:21.771121", + "step": 6399, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:21.805907", + "step": 6399, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008864761330187321, + "timestamp": "2025-09-10 02:29:21.833966", + "step": 6400, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:21.865231", + "step": 6400, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001508051936980337, + "timestamp": "2025-09-10 02:29:21.869796", + "step": 6401, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:21.906240", + "step": 6401, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0057227760553359985, + "timestamp": "2025-09-10 02:29:21.919569", + "step": 6402, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:21.951281", + "step": 6402, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02214057371020317, + "timestamp": "2025-09-10 02:29:21.963275", + "step": 6403, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:22.004341", + "step": 6403, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005699954344891012, + "timestamp": "2025-09-10 02:29:22.035455", + "step": 6404, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:22.066677", + "step": 6404, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000413557660067454, + "timestamp": "2025-09-10 02:29:22.069007", + "step": 6405, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:22.099892", + "step": 6405, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027196184964850545, + "timestamp": "2025-09-10 02:29:22.107338", + "step": 6406, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:22.144101", + "step": 6406, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011958154937019572, + "timestamp": "2025-09-10 02:29:22.151018", + "step": 6407, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:22.182768", + "step": 6407, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002388076245551929, + "timestamp": "2025-09-10 02:29:22.206859", + "step": 6408, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:22.238367", + "step": 6408, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002621088642627001, + "timestamp": "2025-09-10 02:29:22.246894", + "step": 6409, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:22.278944", + "step": 6409, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003606698883231729, + "timestamp": "2025-09-10 02:29:22.289087", + "step": 6410, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:22.323509", + "step": 6410, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.006846979726105928, + "timestamp": "2025-09-10 02:29:22.335043", + "step": 6411, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:22.369704", + "step": 6411, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006495703128166497, + "timestamp": "2025-09-10 02:29:22.398294", + "step": 6412, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:22.431201", + "step": 6412, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001449008472263813, + "timestamp": "2025-09-10 02:29:22.444220", + "step": 6413, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:29:22.482765", + "step": 6413, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04917486757040024, + "timestamp": "2025-09-10 02:29:22.498676", + "step": 6414, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:22.529859", + "step": 6414, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042798795038834214, + "timestamp": "2025-09-10 02:29:22.537243", + "step": 6415, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:22.568292", + "step": 6415, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018693515448831022, + "timestamp": "2025-09-10 02:29:22.593097", + "step": 6416, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:22.625685", + "step": 6416, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015946182247716933, + "timestamp": "2025-09-10 02:29:22.638362", + "step": 6417, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:22.669691", + "step": 6417, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.3737265766831115e-05, + "timestamp": "2025-09-10 02:29:22.679917", + "step": 6418, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:22.712098", + "step": 6418, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.317730680573732e-05, + "timestamp": "2025-09-10 02:29:22.722150", + "step": 6419, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:22.753156", + "step": 6419, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010926821269094944, + "timestamp": "2025-09-10 02:29:22.781553", + "step": 6420, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:22.813347", + "step": 6420, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013701205898541957, + "timestamp": "2025-09-10 02:29:22.817704", + "step": 6421, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:22.850116", + "step": 6421, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004380210011731833, + "timestamp": "2025-09-10 02:29:22.862568", + "step": 6422, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:22.893942", + "step": 6422, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00029581101262010634, + "timestamp": "2025-09-10 02:29:22.901232", + "step": 6423, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:22.932650", + "step": 6423, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028576585464179516, + "timestamp": "2025-09-10 02:29:22.963633", + "step": 6424, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:22.994670", + "step": 6424, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003905796620529145, + "timestamp": "2025-09-10 02:29:22.999676", + "step": 6425, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:23.030989", + "step": 6425, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002705628867261112, + "timestamp": "2025-09-10 02:29:23.038009", + "step": 6426, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:23.072734", + "step": 6426, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02130054123699665, + "timestamp": "2025-09-10 02:29:23.086423", + "step": 6427, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:23.118615", + "step": 6427, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003769658214878291, + "timestamp": "2025-09-10 02:29:23.146259", + "step": 6428, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:23.180368", + "step": 6428, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011719001486198977, + "timestamp": "2025-09-10 02:29:23.182825", + "step": 6429, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:23.214238", + "step": 6429, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0029267126228660345, + "timestamp": "2025-09-10 02:29:23.221867", + "step": 6430, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:23.252492", + "step": 6430, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013307588233146816, + "timestamp": "2025-09-10 02:29:23.255084", + "step": 6431, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:29:23.300832", + "step": 6431, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002167918864870444, + "timestamp": "2025-09-10 02:29:23.340946", + "step": 6432, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:23.380369", + "step": 6432, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011096680100308731, + "timestamp": "2025-09-10 02:29:23.384766", + "step": 6433, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:23.424786", + "step": 6433, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.910169031471014e-05, + "timestamp": "2025-09-10 02:29:23.437350", + "step": 6434, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:29:23.470228", + "step": 6434, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004474697168916464, + "timestamp": "2025-09-10 02:29:23.475217", + "step": 6435, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:23.510567", + "step": 6435, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006678365753032267, + "timestamp": "2025-09-10 02:29:23.542157", + "step": 6436, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:23.583432", + "step": 6436, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005918839015066624, + "timestamp": "2025-09-10 02:29:23.586403", + "step": 6437, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:23.626467", + "step": 6437, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001482577354181558, + "timestamp": "2025-09-10 02:29:23.640472", + "step": 6438, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:23.677715", + "step": 6438, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014806709077674896, + "timestamp": "2025-09-10 02:29:23.687915", + "step": 6439, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:23.725397", + "step": 6439, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005189132643863559, + "timestamp": "2025-09-10 02:29:23.754067", + "step": 6440, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:23.787062", + "step": 6440, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017340357590001076, + "timestamp": "2025-09-10 02:29:23.800043", + "step": 6441, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:23.831000", + "step": 6441, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030487949028611183, + "timestamp": "2025-09-10 02:29:23.838852", + "step": 6442, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:23.869448", + "step": 6442, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026207389310002327, + "timestamp": "2025-09-10 02:29:23.876351", + "step": 6443, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:23.907430", + "step": 6443, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012488577340263873, + "timestamp": "2025-09-10 02:29:23.931293", + "step": 6444, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:23.961966", + "step": 6444, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043719136738218367, + "timestamp": "2025-09-10 02:29:23.964116", + "step": 6445, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:24.004712", + "step": 6445, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001817305019358173, + "timestamp": "2025-09-10 02:29:24.011635", + "step": 6446, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:29:24.052007", + "step": 6446, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037571805296465755, + "timestamp": "2025-09-10 02:29:24.067930", + "step": 6447, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:24.099761", + "step": 6447, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024322188983205706, + "timestamp": "2025-09-10 02:29:24.125144", + "step": 6448, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:24.157642", + "step": 6448, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034805149771273136, + "timestamp": "2025-09-10 02:29:24.163103", + "step": 6449, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:24.193931", + "step": 6449, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.158596756402403e-05, + "timestamp": "2025-09-10 02:29:24.201350", + "step": 6450, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:24.231856", + "step": 6450, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003348083992023021, + "timestamp": "2025-09-10 02:29:24.235749", + "step": 6451, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:24.266402", + "step": 6451, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001363294431939721, + "timestamp": "2025-09-10 02:29:24.294172", + "step": 6452, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:24.325874", + "step": 6452, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007565673440694809, + "timestamp": "2025-09-10 02:29:24.338554", + "step": 6453, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:24.370268", + "step": 6453, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.2014875614549965e-05, + "timestamp": "2025-09-10 02:29:24.378144", + "step": 6454, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:24.408859", + "step": 6454, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.36865438031964e-05, + "timestamp": "2025-09-10 02:29:24.416537", + "step": 6455, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:24.447271", + "step": 6455, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034000244340859354, + "timestamp": "2025-09-10 02:29:24.472591", + "step": 6456, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 608 + ], + "flops": 18035204324480 + }, + "timestamp": "2025-09-10 02:29:24.522668", + "step": 6456, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011615986004471779, + "timestamp": "2025-09-10 02:29:24.544241", + "step": 6457, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:24.576145", + "step": 6457, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007336985436268151, + "timestamp": "2025-09-10 02:29:24.578766", + "step": 6458, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:24.610449", + "step": 6458, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.4305074475705624e-05, + "timestamp": "2025-09-10 02:29:24.622480", + "step": 6459, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:24.655440", + "step": 6459, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005083387950435281, + "timestamp": "2025-09-10 02:29:24.680290", + "step": 6460, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:24.711201", + "step": 6460, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002868285810109228, + "timestamp": "2025-09-10 02:29:24.713814", + "step": 6461, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:24.744724", + "step": 6461, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.002017835271545e-05, + "timestamp": "2025-09-10 02:29:24.748333", + "step": 6462, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:24.781362", + "step": 6462, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.268764551961794e-05, + "timestamp": "2025-09-10 02:29:24.783970", + "step": 6463, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:24.814060", + "step": 6463, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001622526760911569, + "timestamp": "2025-09-10 02:29:24.837871", + "step": 6464, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:29:24.877032", + "step": 6464, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037606866098940372, + "timestamp": "2025-09-10 02:29:24.892897", + "step": 6465, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:24.927792", + "step": 6465, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015440176939591765, + "timestamp": "2025-09-10 02:29:24.941588", + "step": 6466, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:24.973250", + "step": 6466, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043152968282811344, + "timestamp": "2025-09-10 02:29:24.983279", + "step": 6467, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:25.014538", + "step": 6467, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006583757349289954, + "timestamp": "2025-09-10 02:29:25.045133", + "step": 6468, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:29:35.349732", + "step": 6468, + "epoch": 3 + }, + { + "type": "pplx", + "content": 22255779.46558232, + "timestamp": "2025-09-10 02:29:35.352759", + "step": 6468, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:35.384222", + "step": 6468, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009330078610219061, + "timestamp": "2025-09-10 02:29:35.388293", + "step": 6469, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:35.419665", + "step": 6469, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010204659774899483, + "timestamp": "2025-09-10 02:29:35.426735", + "step": 6470, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:35.459078", + "step": 6470, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023531516490038484, + "timestamp": "2025-09-10 02:29:35.465701", + "step": 6471, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:35.503012", + "step": 6471, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004967449931427836, + "timestamp": "2025-09-10 02:29:35.528638", + "step": 6472, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:35.568261", + "step": 6472, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005900642718188465, + "timestamp": "2025-09-10 02:29:35.572624", + "step": 6473, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:35.612783", + "step": 6473, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001301489828620106, + "timestamp": "2025-09-10 02:29:35.623386", + "step": 6474, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:35.658385", + "step": 6474, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002881655527744442, + "timestamp": "2025-09-10 02:29:35.663428", + "step": 6475, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:35.701857", + "step": 6475, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041221058927476406, + "timestamp": "2025-09-10 02:29:35.734473", + "step": 6476, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:35.780821", + "step": 6476, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002252735139336437, + "timestamp": "2025-09-10 02:29:35.785876", + "step": 6477, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:35.820594", + "step": 6477, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011001846723956987, + "timestamp": "2025-09-10 02:29:35.830222", + "step": 6478, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:35.862077", + "step": 6478, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007892417488619685, + "timestamp": "2025-09-10 02:29:35.865799", + "step": 6479, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:35.898683", + "step": 6479, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009268686175346375, + "timestamp": "2025-09-10 02:29:35.926101", + "step": 6480, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:35.961547", + "step": 6480, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002452400978654623, + "timestamp": "2025-09-10 02:29:35.969221", + "step": 6481, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:36.000580", + "step": 6481, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030889807385392487, + "timestamp": "2025-09-10 02:29:36.005020", + "step": 6482, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:36.045471", + "step": 6482, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015111747779883444, + "timestamp": "2025-09-10 02:29:36.052698", + "step": 6483, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:36.091401", + "step": 6483, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000214752959436737, + "timestamp": "2025-09-10 02:29:36.118935", + "step": 6484, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:36.151773", + "step": 6484, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003622962685767561, + "timestamp": "2025-09-10 02:29:36.156467", + "step": 6485, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:36.189541", + "step": 6485, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042898583342321217, + "timestamp": "2025-09-10 02:29:36.196999", + "step": 6486, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:36.232058", + "step": 6486, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003162114298902452, + "timestamp": "2025-09-10 02:29:36.244003", + "step": 6487, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:36.277950", + "step": 6487, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003182740474585444, + "timestamp": "2025-09-10 02:29:36.305280", + "step": 6488, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:36.339842", + "step": 6488, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012332449841778725, + "timestamp": "2025-09-10 02:29:36.345057", + "step": 6489, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:36.384063", + "step": 6489, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016331372899003327, + "timestamp": "2025-09-10 02:29:36.390704", + "step": 6490, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:36.427948", + "step": 6490, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025508024264127016, + "timestamp": "2025-09-10 02:29:36.441375", + "step": 6491, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:36.479851", + "step": 6491, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007555651245638728, + "timestamp": "2025-09-10 02:29:36.511641", + "step": 6492, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:36.546508", + "step": 6492, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002513106446713209, + "timestamp": "2025-09-10 02:29:36.550495", + "step": 6493, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:36.590373", + "step": 6493, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017626323096919805, + "timestamp": "2025-09-10 02:29:36.601330", + "step": 6494, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:36.643268", + "step": 6494, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001794335839804262, + "timestamp": "2025-09-10 02:29:36.657290", + "step": 6495, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:36.689244", + "step": 6495, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.488606206607074e-05, + "timestamp": "2025-09-10 02:29:36.720858", + "step": 6496, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:36.751619", + "step": 6496, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005462738336063921, + "timestamp": "2025-09-10 02:29:36.753944", + "step": 6497, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:29:36.792379", + "step": 6497, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012696681369561702, + "timestamp": "2025-09-10 02:29:36.807955", + "step": 6498, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:36.841494", + "step": 6498, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005157067789696157, + "timestamp": "2025-09-10 02:29:36.852298", + "step": 6499, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:36.883819", + "step": 6499, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005326425889506936, + "timestamp": "2025-09-10 02:29:36.907923", + "step": 6500, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 6500", + "timestamp": "2025-09-10 02:29:41.632309", + "step": 6500, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:41.664361", + "step": 6500, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022441011969931424, + "timestamp": "2025-09-10 02:29:41.668364", + "step": 6501, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:29:41.699351", + "step": 6501, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002523001458030194, + "timestamp": "2025-09-10 02:29:41.701189", + "step": 6502, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:41.732711", + "step": 6502, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.853170346701518e-05, + "timestamp": "2025-09-10 02:29:41.739497", + "step": 6503, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:41.769971", + "step": 6503, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001199183170683682, + "timestamp": "2025-09-10 02:29:41.794729", + "step": 6504, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:29:41.831803", + "step": 6504, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005333385779522359, + "timestamp": "2025-09-10 02:29:41.846885", + "step": 6505, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:41.877696", + "step": 6505, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003356721135787666, + "timestamp": "2025-09-10 02:29:41.887991", + "step": 6506, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:41.930725", + "step": 6506, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004762968164868653, + "timestamp": "2025-09-10 02:29:41.944737", + "step": 6507, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:41.977144", + "step": 6507, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0037521845661103725, + "timestamp": "2025-09-10 02:29:42.005092", + "step": 6508, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:42.039853", + "step": 6508, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003358405956532806, + "timestamp": "2025-09-10 02:29:42.043031", + "step": 6509, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:42.078894", + "step": 6509, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010202966223005205, + "timestamp": "2025-09-10 02:29:42.081544", + "step": 6510, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:42.112909", + "step": 6510, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.8475307014305145e-05, + "timestamp": "2025-09-10 02:29:42.117315", + "step": 6511, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:42.148295", + "step": 6511, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015812184428796172, + "timestamp": "2025-09-10 02:29:42.176703", + "step": 6512, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:42.209147", + "step": 6512, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000630614347755909, + "timestamp": "2025-09-10 02:29:42.213683", + "step": 6513, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:42.246530", + "step": 6513, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002638591453433037, + "timestamp": "2025-09-10 02:29:42.258549", + "step": 6514, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:42.292876", + "step": 6514, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0018498777644708753, + "timestamp": "2025-09-10 02:29:42.306207", + "step": 6515, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:42.345000", + "step": 6515, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014997952384874225, + "timestamp": "2025-09-10 02:29:42.379266", + "step": 6516, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:42.423451", + "step": 6516, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.7881530665326864e-05, + "timestamp": "2025-09-10 02:29:42.426209", + "step": 6517, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:42.460619", + "step": 6517, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023194043897092342, + "timestamp": "2025-09-10 02:29:42.464969", + "step": 6518, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:42.495158", + "step": 6518, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002513010986149311, + "timestamp": "2025-09-10 02:29:42.499820", + "step": 6519, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:42.530931", + "step": 6519, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.767297888174653e-05, + "timestamp": "2025-09-10 02:29:42.559204", + "step": 6520, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:42.592773", + "step": 6520, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012455906253308058, + "timestamp": "2025-09-10 02:29:42.605309", + "step": 6521, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:42.638972", + "step": 6521, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012123944179620594, + "timestamp": "2025-09-10 02:29:42.644016", + "step": 6522, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:42.700829", + "step": 6522, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010264220327371731, + "timestamp": "2025-09-10 02:29:42.711735", + "step": 6523, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:42.743875", + "step": 6523, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013027484237682074, + "timestamp": "2025-09-10 02:29:42.774849", + "step": 6524, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:42.829251", + "step": 6524, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.051839955034666e-05, + "timestamp": "2025-09-10 02:29:42.830918", + "step": 6525, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:42.863231", + "step": 6525, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017694670532364398, + "timestamp": "2025-09-10 02:29:42.875174", + "step": 6526, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:42.911744", + "step": 6526, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001635013904888183, + "timestamp": "2025-09-10 02:29:42.918386", + "step": 6527, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:42.951115", + "step": 6527, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000155866306158714, + "timestamp": "2025-09-10 02:29:42.981872", + "step": 6528, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:43.018101", + "step": 6528, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020132049394305795, + "timestamp": "2025-09-10 02:29:43.027472", + "step": 6529, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:29:43.059876", + "step": 6529, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001669298653723672, + "timestamp": "2025-09-10 02:29:43.064357", + "step": 6530, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:43.106956", + "step": 6530, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.987391968024895e-05, + "timestamp": "2025-09-10 02:29:43.120572", + "step": 6531, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:43.161837", + "step": 6531, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.64821855025366e-05, + "timestamp": "2025-09-10 02:29:43.192643", + "step": 6532, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:43.233736", + "step": 6532, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00035039763315580785, + "timestamp": "2025-09-10 02:29:43.238128", + "step": 6533, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:43.275737", + "step": 6533, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019631556642707437, + "timestamp": "2025-09-10 02:29:43.286331", + "step": 6534, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:43.321424", + "step": 6534, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004502573050558567, + "timestamp": "2025-09-10 02:29:43.328363", + "step": 6535, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:43.367093", + "step": 6535, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.936825179262087e-05, + "timestamp": "2025-09-10 02:29:43.401939", + "step": 6536, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:43.444500", + "step": 6536, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012994735734537244, + "timestamp": "2025-09-10 02:29:43.450004", + "step": 6537, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:43.488581", + "step": 6537, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024626238155178726, + "timestamp": "2025-09-10 02:29:43.500954", + "step": 6538, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:43.537888", + "step": 6538, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0011256147408857942, + "timestamp": "2025-09-10 02:29:43.544647", + "step": 6539, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:43.582021", + "step": 6539, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003471332311164588, + "timestamp": "2025-09-10 02:29:43.610260", + "step": 6540, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:43.648185", + "step": 6540, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005624201148748398, + "timestamp": "2025-09-10 02:29:43.652530", + "step": 6541, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:29:43.685826", + "step": 6541, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000520360074006021, + "timestamp": "2025-09-10 02:29:43.688755", + "step": 6542, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:43.728439", + "step": 6542, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008386308327317238, + "timestamp": "2025-09-10 02:29:43.735366", + "step": 6543, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:43.775439", + "step": 6543, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001823129568947479, + "timestamp": "2025-09-10 02:29:43.806352", + "step": 6544, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:43.839287", + "step": 6544, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.192834265064448e-05, + "timestamp": "2025-09-10 02:29:43.843723", + "step": 6545, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:43.875102", + "step": 6545, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004066001274622977, + "timestamp": "2025-09-10 02:29:43.879468", + "step": 6546, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:43.911348", + "step": 6546, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005676894914358854, + "timestamp": "2025-09-10 02:29:43.915314", + "step": 6547, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:43.949276", + "step": 6547, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017382523510605097, + "timestamp": "2025-09-10 02:29:43.977822", + "step": 6548, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:44.008971", + "step": 6548, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020217923156451434, + "timestamp": "2025-09-10 02:29:44.014497", + "step": 6549, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:44.047133", + "step": 6549, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010007356759160757, + "timestamp": "2025-09-10 02:29:44.057103", + "step": 6550, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:44.093005", + "step": 6550, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002772933221422136, + "timestamp": "2025-09-10 02:29:44.100364", + "step": 6551, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:44.132702", + "step": 6551, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.65510301082395e-05, + "timestamp": "2025-09-10 02:29:44.165536", + "step": 6552, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:44.199627", + "step": 6552, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012201262870803475, + "timestamp": "2025-09-10 02:29:44.207311", + "step": 6553, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:44.241883", + "step": 6553, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.7427620807429776e-05, + "timestamp": "2025-09-10 02:29:44.253642", + "step": 6554, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:44.289381", + "step": 6554, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004760746378451586, + "timestamp": "2025-09-10 02:29:44.293433", + "step": 6555, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:44.326167", + "step": 6555, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.913670247537084e-05, + "timestamp": "2025-09-10 02:29:44.354407", + "step": 6556, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:44.384952", + "step": 6556, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003855906252283603, + "timestamp": "2025-09-10 02:29:44.390366", + "step": 6557, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:44.421919", + "step": 6557, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006527866353280842, + "timestamp": "2025-09-10 02:29:44.429692", + "step": 6558, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:44.460396", + "step": 6558, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004126446438021958, + "timestamp": "2025-09-10 02:29:44.467105", + "step": 6559, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:44.499030", + "step": 6559, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010248164471704513, + "timestamp": "2025-09-10 02:29:44.527617", + "step": 6560, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:44.582600", + "step": 6560, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.3595810489496216e-05, + "timestamp": "2025-09-10 02:29:44.590845", + "step": 6561, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:44.647250", + "step": 6561, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.688699143822305e-05, + "timestamp": "2025-09-10 02:29:44.660980", + "step": 6562, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:44.699999", + "step": 6562, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012046539632137865, + "timestamp": "2025-09-10 02:29:44.706900", + "step": 6563, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:44.742152", + "step": 6563, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.20097081689164e-05, + "timestamp": "2025-09-10 02:29:44.778567", + "step": 6564, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:44.867801", + "step": 6564, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.921749056549743e-05, + "timestamp": "2025-09-10 02:29:44.872821", + "step": 6565, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:29:44.939179", + "step": 6565, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0032475763000547886, + "timestamp": "2025-09-10 02:29:44.955024", + "step": 6566, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 848 + ], + "flops": 25154260214720 + }, + "timestamp": "2025-09-10 02:29:45.056353", + "step": 6566, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.683669875608757e-05, + "timestamp": "2025-09-10 02:29:45.085999", + "step": 6567, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:45.121927", + "step": 6567, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.070793799357489e-05, + "timestamp": "2025-09-10 02:29:45.155401", + "step": 6568, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:45.186619", + "step": 6568, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010982885723933578, + "timestamp": "2025-09-10 02:29:45.189689", + "step": 6569, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:45.223202", + "step": 6569, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.731281948508695e-05, + "timestamp": "2025-09-10 02:29:45.227551", + "step": 6570, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:45.259253", + "step": 6570, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015924021136015654, + "timestamp": "2025-09-10 02:29:45.269427", + "step": 6571, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:45.306151", + "step": 6571, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023259581939782947, + "timestamp": "2025-09-10 02:29:45.333773", + "step": 6572, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:45.367653", + "step": 6572, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009659113129600883, + "timestamp": "2025-09-10 02:29:45.380672", + "step": 6573, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:45.416456", + "step": 6573, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002564275055192411, + "timestamp": "2025-09-10 02:29:45.430411", + "step": 6574, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:45.462075", + "step": 6574, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.740082557778805e-05, + "timestamp": "2025-09-10 02:29:45.466099", + "step": 6575, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:29:45.509626", + "step": 6575, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016587102436460555, + "timestamp": "2025-09-10 02:29:45.548209", + "step": 6576, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:29:45.591561", + "step": 6576, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008845412521623075, + "timestamp": "2025-09-10 02:29:45.607247", + "step": 6577, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:45.645434", + "step": 6577, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.017135100439190865, + "timestamp": "2025-09-10 02:29:45.649484", + "step": 6578, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:45.685410", + "step": 6578, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005552711896598339, + "timestamp": "2025-09-10 02:29:45.697410", + "step": 6579, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:45.730471", + "step": 6579, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.742481516790576e-05, + "timestamp": "2025-09-10 02:29:45.766625", + "step": 6580, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:45.802511", + "step": 6580, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011518421524669975, + "timestamp": "2025-09-10 02:29:45.810931", + "step": 6581, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:45.844646", + "step": 6581, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02741926722228527, + "timestamp": "2025-09-10 02:29:45.855494", + "step": 6582, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:45.888579", + "step": 6582, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002638068108353764, + "timestamp": "2025-09-10 02:29:45.895389", + "step": 6583, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:45.934941", + "step": 6583, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.82187344157137e-05, + "timestamp": "2025-09-10 02:29:45.969171", + "step": 6584, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:46.009699", + "step": 6584, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043393290252424777, + "timestamp": "2025-09-10 02:29:46.014232", + "step": 6585, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:46.054569", + "step": 6585, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005896832444705069, + "timestamp": "2025-09-10 02:29:46.063670", + "step": 6586, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:46.097949", + "step": 6586, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.4004228180274367e-05, + "timestamp": "2025-09-10 02:29:46.104779", + "step": 6587, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:29:46.143132", + "step": 6587, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.16260190831963e-05, + "timestamp": "2025-09-10 02:29:46.167051", + "step": 6588, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:29:46.202964", + "step": 6588, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.08385879546403885, + "timestamp": "2025-09-10 02:29:46.218140", + "step": 6589, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:46.253903", + "step": 6589, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.120770245208405e-05, + "timestamp": "2025-09-10 02:29:46.258184", + "step": 6590, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:46.292173", + "step": 6590, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004548307042568922, + "timestamp": "2025-09-10 02:29:46.304343", + "step": 6591, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:46.337290", + "step": 6591, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008054524078033864, + "timestamp": "2025-09-10 02:29:46.367624", + "step": 6592, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:46.403516", + "step": 6592, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000291215896140784, + "timestamp": "2025-09-10 02:29:46.407847", + "step": 6593, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:29:46.446194", + "step": 6593, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.139638854714576e-05, + "timestamp": "2025-09-10 02:29:46.461869", + "step": 6594, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:46.494685", + "step": 6594, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.8166157790110447e-05, + "timestamp": "2025-09-10 02:29:46.502312", + "step": 6595, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:46.533636", + "step": 6595, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0057443794794380665, + "timestamp": "2025-09-10 02:29:46.561742", + "step": 6596, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:46.595237", + "step": 6596, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.872858208836988e-05, + "timestamp": "2025-09-10 02:29:46.608219", + "step": 6597, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:46.639652", + "step": 6597, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032129278406500816, + "timestamp": "2025-09-10 02:29:46.646828", + "step": 6598, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:46.678399", + "step": 6598, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.182164386496879e-05, + "timestamp": "2025-09-10 02:29:46.685105", + "step": 6599, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:46.720441", + "step": 6599, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004248176119290292, + "timestamp": "2025-09-10 02:29:46.753685", + "step": 6600, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:46.793074", + "step": 6600, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.2954368584323674e-05, + "timestamp": "2025-09-10 02:29:46.797228", + "step": 6601, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:46.828902", + "step": 6601, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.363394641084597e-05, + "timestamp": "2025-09-10 02:29:46.835986", + "step": 6602, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:46.871029", + "step": 6602, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003344352007843554, + "timestamp": "2025-09-10 02:29:46.878236", + "step": 6603, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:46.912555", + "step": 6603, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003345024597365409, + "timestamp": "2025-09-10 02:29:46.940194", + "step": 6604, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:46.975062", + "step": 6604, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013073600712232292, + "timestamp": "2025-09-10 02:29:46.980181", + "step": 6605, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:47.011831", + "step": 6605, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012158307799836621, + "timestamp": "2025-09-10 02:29:47.018238", + "step": 6606, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:47.050475", + "step": 6606, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014575115346815437, + "timestamp": "2025-09-10 02:29:47.059974", + "step": 6607, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:29:47.115534", + "step": 6607, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006869042408652604, + "timestamp": "2025-09-10 02:29:47.159829", + "step": 6608, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:47.192433", + "step": 6608, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005232971161603928, + "timestamp": "2025-09-10 02:29:47.200103", + "step": 6609, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:47.231788", + "step": 6609, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002385809930274263, + "timestamp": "2025-09-10 02:29:47.238211", + "step": 6610, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:29:47.278395", + "step": 6610, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031182175735011697, + "timestamp": "2025-09-10 02:29:47.294275", + "step": 6611, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:47.325939", + "step": 6611, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.482206223765388e-05, + "timestamp": "2025-09-10 02:29:47.353366", + "step": 6612, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:47.387214", + "step": 6612, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003503487096168101, + "timestamp": "2025-09-10 02:29:47.392229", + "step": 6613, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:47.423378", + "step": 6613, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023075289209373295, + "timestamp": "2025-09-10 02:29:47.435195", + "step": 6614, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:29:47.470337", + "step": 6614, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.644097786396742e-05, + "timestamp": "2025-09-10 02:29:47.484129", + "step": 6615, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:29:57.697352", + "step": 6615, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23155743.853774708, + "timestamp": "2025-09-10 02:29:57.700483", + "step": 6615, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:57.731870", + "step": 6615, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013089847925584763, + "timestamp": "2025-09-10 02:29:57.763239", + "step": 6616, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:57.795019", + "step": 6616, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008317952044308186, + "timestamp": "2025-09-10 02:29:57.802667", + "step": 6617, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:57.836916", + "step": 6617, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014241410826798528, + "timestamp": "2025-09-10 02:29:57.850624", + "step": 6618, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:57.882574", + "step": 6618, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004595229693222791, + "timestamp": "2025-09-10 02:29:57.890144", + "step": 6619, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:57.920736", + "step": 6619, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002367516717640683, + "timestamp": "2025-09-10 02:29:57.945896", + "step": 6620, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:57.976938", + "step": 6620, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.231966057792306e-05, + "timestamp": "2025-09-10 02:29:57.987433", + "step": 6621, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:58.018056", + "step": 6621, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013666613085661083, + "timestamp": "2025-09-10 02:29:58.028280", + "step": 6622, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:29:58.058965", + "step": 6622, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022755752434022725, + "timestamp": "2025-09-10 02:29:58.061395", + "step": 6623, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:58.092174", + "step": 6623, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.438480391399935e-05, + "timestamp": "2025-09-10 02:29:58.120015", + "step": 6624, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:58.150543", + "step": 6624, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011097739479737356, + "timestamp": "2025-09-10 02:29:58.158399", + "step": 6625, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:29:58.191879", + "step": 6625, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.087979636504315e-05, + "timestamp": "2025-09-10 02:29:58.205310", + "step": 6626, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:58.236157", + "step": 6626, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001310189691139385, + "timestamp": "2025-09-10 02:29:58.243858", + "step": 6627, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:58.273884", + "step": 6627, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006077897851355374, + "timestamp": "2025-09-10 02:29:58.300002", + "step": 6628, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:58.330536", + "step": 6628, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00032031405135057867, + "timestamp": "2025-09-10 02:29:58.335086", + "step": 6629, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:29:58.369661", + "step": 6629, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019072220311500132, + "timestamp": "2025-09-10 02:29:58.383352", + "step": 6630, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:58.416056", + "step": 6630, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016121887892950326, + "timestamp": "2025-09-10 02:29:58.420014", + "step": 6631, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:58.450592", + "step": 6631, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014108339382801205, + "timestamp": "2025-09-10 02:29:58.478324", + "step": 6632, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:58.510726", + "step": 6632, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026426606927998364, + "timestamp": "2025-09-10 02:29:58.518657", + "step": 6633, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:58.549418", + "step": 6633, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017428163846489042, + "timestamp": "2025-09-10 02:29:58.556254", + "step": 6634, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:58.587457", + "step": 6634, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.75916045717895e-05, + "timestamp": "2025-09-10 02:29:58.595369", + "step": 6635, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:29:58.630012", + "step": 6635, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006233084131963551, + "timestamp": "2025-09-10 02:29:58.664915", + "step": 6636, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:58.695685", + "step": 6636, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.04499921575188637, + "timestamp": "2025-09-10 02:29:58.700709", + "step": 6637, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:29:58.731676", + "step": 6637, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001879000337794423, + "timestamp": "2025-09-10 02:29:58.734198", + "step": 6638, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:58.765793", + "step": 6638, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037380700814537704, + "timestamp": "2025-09-10 02:29:58.777909", + "step": 6639, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:58.810035", + "step": 6639, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000758981506805867, + "timestamp": "2025-09-10 02:29:58.835096", + "step": 6640, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:58.866403", + "step": 6640, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025593198370188475, + "timestamp": "2025-09-10 02:29:58.869376", + "step": 6641, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:58.902071", + "step": 6641, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043907135841436684, + "timestamp": "2025-09-10 02:29:58.906026", + "step": 6642, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:58.939048", + "step": 6642, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006986635853536427, + "timestamp": "2025-09-10 02:29:58.951100", + "step": 6643, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:29:58.982239", + "step": 6643, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012294725456740707, + "timestamp": "2025-09-10 02:29:59.015713", + "step": 6644, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:59.048251", + "step": 6644, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016030111873988062, + "timestamp": "2025-09-10 02:29:59.056301", + "step": 6645, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:29:59.087486", + "step": 6645, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019082374637946486, + "timestamp": "2025-09-10 02:29:59.098455", + "step": 6646, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:59.130766", + "step": 6646, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003217429621145129, + "timestamp": "2025-09-10 02:29:59.141083", + "step": 6647, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:59.171745", + "step": 6647, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001194212309201248, + "timestamp": "2025-09-10 02:29:59.202813", + "step": 6648, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:59.233711", + "step": 6648, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015604333020746708, + "timestamp": "2025-09-10 02:29:59.238488", + "step": 6649, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:59.270048", + "step": 6649, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003292102483101189, + "timestamp": "2025-09-10 02:29:59.277806", + "step": 6650, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:59.311348", + "step": 6650, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001702476729406044, + "timestamp": "2025-09-10 02:29:59.323584", + "step": 6651, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:59.353991", + "step": 6651, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.551568316761404e-05, + "timestamp": "2025-09-10 02:29:59.387131", + "step": 6652, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:59.417958", + "step": 6652, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008532029460184276, + "timestamp": "2025-09-10 02:29:59.422673", + "step": 6653, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:59.454139", + "step": 6653, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002605307847261429, + "timestamp": "2025-09-10 02:29:59.461242", + "step": 6654, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:59.492600", + "step": 6654, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.880874909460545e-05, + "timestamp": "2025-09-10 02:29:59.499552", + "step": 6655, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:29:59.530925", + "step": 6655, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003353517968207598, + "timestamp": "2025-09-10 02:29:59.558839", + "step": 6656, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:59.590568", + "step": 6656, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013173728075344115, + "timestamp": "2025-09-10 02:29:59.595098", + "step": 6657, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:59.625524", + "step": 6657, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001572458859300241, + "timestamp": "2025-09-10 02:29:59.635684", + "step": 6658, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:59.667141", + "step": 6658, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001322474709013477, + "timestamp": "2025-09-10 02:29:59.674006", + "step": 6659, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:29:59.705246", + "step": 6659, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003668780846055597, + "timestamp": "2025-09-10 02:29:59.736437", + "step": 6660, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:29:59.767777", + "step": 6660, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014766550157219172, + "timestamp": "2025-09-10 02:29:59.777501", + "step": 6661, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:29:59.808844", + "step": 6661, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010263031435897574, + "timestamp": "2025-09-10 02:29:59.816341", + "step": 6662, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:29:59.846707", + "step": 6662, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016308830527123064, + "timestamp": "2025-09-10 02:29:59.850690", + "step": 6663, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:29:59.880950", + "step": 6663, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017855261103250086, + "timestamp": "2025-09-10 02:29:59.908755", + "step": 6664, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:29:59.940347", + "step": 6664, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002598909428343177, + "timestamp": "2025-09-10 02:29:59.942693", + "step": 6665, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:29:59.972688", + "step": 6665, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.03273782879114151, + "timestamp": "2025-09-10 02:29:59.980415", + "step": 6666, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:00.011943", + "step": 6666, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009771647164598107, + "timestamp": "2025-09-10 02:30:00.022056", + "step": 6667, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:00.053644", + "step": 6667, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001550656888866797, + "timestamp": "2025-09-10 02:30:00.081954", + "step": 6668, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:00.115747", + "step": 6668, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.419160490622744e-05, + "timestamp": "2025-09-10 02:30:00.123685", + "step": 6669, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:00.155047", + "step": 6669, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010343643371015787, + "timestamp": "2025-09-10 02:30:00.162649", + "step": 6670, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:00.197090", + "step": 6670, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004840478941332549, + "timestamp": "2025-09-10 02:30:00.210903", + "step": 6671, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:00.246281", + "step": 6671, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007365471683442593, + "timestamp": "2025-09-10 02:30:00.280494", + "step": 6672, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:00.312642", + "step": 6672, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023997330572456121, + "timestamp": "2025-09-10 02:30:00.318091", + "step": 6673, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:00.349431", + "step": 6673, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003005491744261235, + "timestamp": "2025-09-10 02:30:00.361669", + "step": 6674, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:00.392866", + "step": 6674, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004848405660595745, + "timestamp": "2025-09-10 02:30:00.399901", + "step": 6675, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:00.431202", + "step": 6675, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005252750124782324, + "timestamp": "2025-09-10 02:30:00.459830", + "step": 6676, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:00.491167", + "step": 6676, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00031378321000374854, + "timestamp": "2025-09-10 02:30:00.495509", + "step": 6677, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:00.527111", + "step": 6677, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002828229626175016, + "timestamp": "2025-09-10 02:30:00.537098", + "step": 6678, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:00.568785", + "step": 6678, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015684754180256277, + "timestamp": "2025-09-10 02:30:00.576183", + "step": 6679, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:00.606885", + "step": 6679, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003511524701025337, + "timestamp": "2025-09-10 02:30:00.630735", + "step": 6680, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:00.662482", + "step": 6680, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010571930761216208, + "timestamp": "2025-09-10 02:30:00.672718", + "step": 6681, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:00.705496", + "step": 6681, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010457936878083274, + "timestamp": "2025-09-10 02:30:00.709653", + "step": 6682, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:00.740382", + "step": 6682, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036585141788236797, + "timestamp": "2025-09-10 02:30:00.747032", + "step": 6683, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:00.782833", + "step": 6683, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012327669537626207, + "timestamp": "2025-09-10 02:30:00.810543", + "step": 6684, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:00.841504", + "step": 6684, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021850709163118154, + "timestamp": "2025-09-10 02:30:00.844142", + "step": 6685, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:00.875302", + "step": 6685, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018914879183284938, + "timestamp": "2025-09-10 02:30:00.887630", + "step": 6686, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:00.918893", + "step": 6686, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001374257553834468, + "timestamp": "2025-09-10 02:30:00.921422", + "step": 6687, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:30:00.957894", + "step": 6687, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024864732404239476, + "timestamp": "2025-09-10 02:30:00.992738", + "step": 6688, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:01.024373", + "step": 6688, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.009503054432570934, + "timestamp": "2025-09-10 02:30:01.028078", + "step": 6689, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:01.063225", + "step": 6689, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002550124190747738, + "timestamp": "2025-09-10 02:30:01.070513", + "step": 6690, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 432 + ], + "flops": 12814563338304 + }, + "timestamp": "2025-09-10 02:30:01.109691", + "step": 6690, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004109316796530038, + "timestamp": "2025-09-10 02:30:01.125868", + "step": 6691, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:01.157073", + "step": 6691, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036797040957026184, + "timestamp": "2025-09-10 02:30:01.185883", + "step": 6692, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:01.216983", + "step": 6692, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011126509343739599, + "timestamp": "2025-09-10 02:30:01.224967", + "step": 6693, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:01.257817", + "step": 6693, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.454932347172871e-05, + "timestamp": "2025-09-10 02:30:01.265509", + "step": 6694, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:01.296459", + "step": 6694, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021754649060312659, + "timestamp": "2025-09-10 02:30:01.303768", + "step": 6695, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:01.334621", + "step": 6695, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014526637096423656, + "timestamp": "2025-09-10 02:30:01.358375", + "step": 6696, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:30:01.396064", + "step": 6696, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002652324619702995, + "timestamp": "2025-09-10 02:30:01.411973", + "step": 6697, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:01.443004", + "step": 6697, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011327861284371465, + "timestamp": "2025-09-10 02:30:01.449826", + "step": 6698, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:01.481727", + "step": 6698, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00028877213480882347, + "timestamp": "2025-09-10 02:30:01.489092", + "step": 6699, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:01.520717", + "step": 6699, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022058105969335884, + "timestamp": "2025-09-10 02:30:01.546093", + "step": 6700, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:01.577682", + "step": 6700, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002654526033438742, + "timestamp": "2025-09-10 02:30:01.582188", + "step": 6701, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:01.615409", + "step": 6701, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038097609649412334, + "timestamp": "2025-09-10 02:30:01.622347", + "step": 6702, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:01.653294", + "step": 6702, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023465848062187433, + "timestamp": "2025-09-10 02:30:01.660185", + "step": 6703, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:01.692737", + "step": 6703, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016869421233423054, + "timestamp": "2025-09-10 02:30:01.723948", + "step": 6704, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:01.755326", + "step": 6704, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002982628939207643, + "timestamp": "2025-09-10 02:30:01.760029", + "step": 6705, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:01.796033", + "step": 6705, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042490853229537606, + "timestamp": "2025-09-10 02:30:01.808562", + "step": 6706, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:01.842911", + "step": 6706, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036351257585920393, + "timestamp": "2025-09-10 02:30:01.847262", + "step": 6707, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:01.878290", + "step": 6707, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016362879250664264, + "timestamp": "2025-09-10 02:30:01.906042", + "step": 6708, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:01.937236", + "step": 6708, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014777052274439484, + "timestamp": "2025-09-10 02:30:01.942279", + "step": 6709, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:01.973464", + "step": 6709, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013149731967132539, + "timestamp": "2025-09-10 02:30:01.979403", + "step": 6710, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:02.020841", + "step": 6710, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.87174644251354e-05, + "timestamp": "2025-09-10 02:30:02.028460", + "step": 6711, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:02.059923", + "step": 6711, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.164093353319913e-05, + "timestamp": "2025-09-10 02:30:02.097837", + "step": 6712, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:02.163435", + "step": 6712, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.91975731367711e-05, + "timestamp": "2025-09-10 02:30:02.171839", + "step": 6713, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:02.215126", + "step": 6713, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025522714713588357, + "timestamp": "2025-09-10 02:30:02.227085", + "step": 6714, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:02.266867", + "step": 6714, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016006106743589044, + "timestamp": "2025-09-10 02:30:02.279452", + "step": 6715, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:02.321903", + "step": 6715, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.307100946083665e-05, + "timestamp": "2025-09-10 02:30:02.346711", + "step": 6716, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:02.381968", + "step": 6716, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.852695802692324e-05, + "timestamp": "2025-09-10 02:30:02.384511", + "step": 6717, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:02.418468", + "step": 6717, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003025185433216393, + "timestamp": "2025-09-10 02:30:02.425355", + "step": 6718, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:02.456597", + "step": 6718, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015510991215705872, + "timestamp": "2025-09-10 02:30:02.469113", + "step": 6719, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:02.500955", + "step": 6719, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001259516429854557, + "timestamp": "2025-09-10 02:30:02.528547", + "step": 6720, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:02.559964", + "step": 6720, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008776256581768394, + "timestamp": "2025-09-10 02:30:02.562312", + "step": 6721, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:02.594792", + "step": 6721, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.6968630310148e-05, + "timestamp": "2025-09-10 02:30:02.605651", + "step": 6722, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:02.638245", + "step": 6722, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000490417645778507, + "timestamp": "2025-09-10 02:30:02.648085", + "step": 6723, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:02.679494", + "step": 6723, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0023274854756891727, + "timestamp": "2025-09-10 02:30:02.708135", + "step": 6724, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:02.738780", + "step": 6724, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.722565351286903e-05, + "timestamp": "2025-09-10 02:30:02.741949", + "step": 6725, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:02.775968", + "step": 6725, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001424977817805484, + "timestamp": "2025-09-10 02:30:02.782594", + "step": 6726, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:02.814355", + "step": 6726, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002022543194470927, + "timestamp": "2025-09-10 02:30:02.820951", + "step": 6727, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:02.852914", + "step": 6727, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003710582968778908, + "timestamp": "2025-09-10 02:30:02.876861", + "step": 6728, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:02.909251", + "step": 6728, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013393805420491844, + "timestamp": "2025-09-10 02:30:02.911317", + "step": 6729, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:02.942365", + "step": 6729, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025334549718536437, + "timestamp": "2025-09-10 02:30:02.949941", + "step": 6730, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:02.981314", + "step": 6730, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015375103102996945, + "timestamp": "2025-09-10 02:30:02.985634", + "step": 6731, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:03.016998", + "step": 6731, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00120832200627774, + "timestamp": "2025-09-10 02:30:03.045410", + "step": 6732, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:03.078789", + "step": 6732, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019324652384966612, + "timestamp": "2025-09-10 02:30:03.081264", + "step": 6733, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:03.112557", + "step": 6733, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00045514092198573053, + "timestamp": "2025-09-10 02:30:03.115146", + "step": 6734, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:03.146534", + "step": 6734, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002553352096583694, + "timestamp": "2025-09-10 02:30:03.157281", + "step": 6735, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:30:03.192725", + "step": 6735, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010565890261204913, + "timestamp": "2025-09-10 02:30:03.227326", + "step": 6736, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:30:03.262594", + "step": 6736, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019748820341192186, + "timestamp": "2025-09-10 02:30:03.275653", + "step": 6737, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:03.308506", + "step": 6737, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015901295410003513, + "timestamp": "2025-09-10 02:30:03.315595", + "step": 6738, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:30:03.356029", + "step": 6738, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040654095937497914, + "timestamp": "2025-09-10 02:30:03.369737", + "step": 6739, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:03.401756", + "step": 6739, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.327982893912122e-05, + "timestamp": "2025-09-10 02:30:03.430063", + "step": 6740, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:03.465677", + "step": 6740, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001900206902064383, + "timestamp": "2025-09-10 02:30:03.472042", + "step": 6741, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:03.509158", + "step": 6741, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012812459317501634, + "timestamp": "2025-09-10 02:30:03.516098", + "step": 6742, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:03.547366", + "step": 6742, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014215106784831733, + "timestamp": "2025-09-10 02:30:03.551339", + "step": 6743, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:03.583264", + "step": 6743, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015154675347730517, + "timestamp": "2025-09-10 02:30:03.608006", + "step": 6744, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:03.639794", + "step": 6744, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00048416477511636913, + "timestamp": "2025-09-10 02:30:03.645121", + "step": 6745, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:03.680280", + "step": 6745, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0016691208584234118, + "timestamp": "2025-09-10 02:30:03.693642", + "step": 6746, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:03.726243", + "step": 6746, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012729717418551445, + "timestamp": "2025-09-10 02:30:03.733692", + "step": 6747, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:03.766399", + "step": 6747, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.610819400520995e-05, + "timestamp": "2025-09-10 02:30:03.799709", + "step": 6748, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:03.831631", + "step": 6748, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004302055749576539, + "timestamp": "2025-09-10 02:30:03.836191", + "step": 6749, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:30:03.882183", + "step": 6749, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018864336016122252, + "timestamp": "2025-09-10 02:30:03.901351", + "step": 6750, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:03.933412", + "step": 6750, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017309685063082725, + "timestamp": "2025-09-10 02:30:03.940633", + "step": 6751, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:03.974347", + "step": 6751, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.622447835979983e-05, + "timestamp": "2025-09-10 02:30:04.008600", + "step": 6752, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:04.039796", + "step": 6752, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003488397051114589, + "timestamp": "2025-09-10 02:30:04.044117", + "step": 6753, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:04.076402", + "step": 6753, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013236506492830813, + "timestamp": "2025-09-10 02:30:04.083647", + "step": 6754, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:04.115194", + "step": 6754, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018496920529287308, + "timestamp": "2025-09-10 02:30:04.122414", + "step": 6755, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:04.157677", + "step": 6755, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016140654042828828, + "timestamp": "2025-09-10 02:30:04.192428", + "step": 6756, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:04.224380", + "step": 6756, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.932354153832421e-05, + "timestamp": "2025-09-10 02:30:04.228472", + "step": 6757, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:30:04.268473", + "step": 6757, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006969981477595866, + "timestamp": "2025-09-10 02:30:04.284320", + "step": 6758, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:04.320015", + "step": 6758, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.715174822602421e-05, + "timestamp": "2025-09-10 02:30:04.333442", + "step": 6759, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:04.366619", + "step": 6759, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00027297786436975, + "timestamp": "2025-09-10 02:30:04.392481", + "step": 6760, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:04.422941", + "step": 6760, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.975103001925163e-05, + "timestamp": "2025-09-10 02:30:04.425156", + "step": 6761, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:04.456491", + "step": 6761, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024496050900779665, + "timestamp": "2025-09-10 02:30:04.466667", + "step": 6762, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:30:15.292289", + "step": 6762, + "epoch": 3 + }, + { + "type": "pplx", + "content": 23855258.0813289, + "timestamp": "2025-09-10 02:30:15.295377", + "step": 6762, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:15.326564", + "step": 6762, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013265803863760084, + "timestamp": "2025-09-10 02:30:15.329741", + "step": 6763, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:15.364987", + "step": 6763, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006455808761529624, + "timestamp": "2025-09-10 02:30:15.393246", + "step": 6764, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:15.425460", + "step": 6764, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010767403291538358, + "timestamp": "2025-09-10 02:30:15.438173", + "step": 6765, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:15.469805", + "step": 6765, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016831964603625238, + "timestamp": "2025-09-10 02:30:15.477267", + "step": 6766, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:15.540508", + "step": 6766, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.969191210577264e-05, + "timestamp": "2025-09-10 02:30:15.547193", + "step": 6767, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:15.602576", + "step": 6767, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.008119060657918453, + "timestamp": "2025-09-10 02:30:15.634471", + "step": 6768, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:15.686556", + "step": 6768, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025774035602808, + "timestamp": "2025-09-10 02:30:15.690953", + "step": 6769, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:15.732130", + "step": 6769, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003182920045219362, + "timestamp": "2025-09-10 02:30:15.738838", + "step": 6770, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:15.772239", + "step": 6770, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016758790297899395, + "timestamp": "2025-09-10 02:30:15.784544", + "step": 6771, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:15.815462", + "step": 6771, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013579537626355886, + "timestamp": "2025-09-10 02:30:15.846516", + "step": 6772, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:15.881090", + "step": 6772, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003011829103343189, + "timestamp": "2025-09-10 02:30:15.888746", + "step": 6773, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:15.922440", + "step": 6773, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005984340095892549, + "timestamp": "2025-09-10 02:30:15.935758", + "step": 6774, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:15.969843", + "step": 6774, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.118032565107569e-05, + "timestamp": "2025-09-10 02:30:15.981573", + "step": 6775, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:16.014106", + "step": 6775, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.917355494806543e-05, + "timestamp": "2025-09-10 02:30:16.041935", + "step": 6776, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:16.072768", + "step": 6776, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.683034735033289e-05, + "timestamp": "2025-09-10 02:30:16.077988", + "step": 6777, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:16.110417", + "step": 6777, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003518997982610017, + "timestamp": "2025-09-10 02:30:16.117652", + "step": 6778, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:16.148684", + "step": 6778, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.0944421673193574e-05, + "timestamp": "2025-09-10 02:30:16.151125", + "step": 6779, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:16.182224", + "step": 6779, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011305516818538308, + "timestamp": "2025-09-10 02:30:16.210513", + "step": 6780, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:16.242733", + "step": 6780, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.004298292566090822, + "timestamp": "2025-09-10 02:30:16.251720", + "step": 6781, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:16.282589", + "step": 6781, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013062043581157923, + "timestamp": "2025-09-10 02:30:16.294634", + "step": 6782, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:16.329331", + "step": 6782, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.012010819278657436, + "timestamp": "2025-09-10 02:30:16.336729", + "step": 6783, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:16.367635", + "step": 6783, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013636215589940548, + "timestamp": "2025-09-10 02:30:16.399380", + "step": 6784, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:16.437640", + "step": 6784, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012389972107484937, + "timestamp": "2025-09-10 02:30:16.442370", + "step": 6785, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:16.475988", + "step": 6785, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001779487356543541, + "timestamp": "2025-09-10 02:30:16.483346", + "step": 6786, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:16.517989", + "step": 6786, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001430910429917276, + "timestamp": "2025-09-10 02:30:16.524922", + "step": 6787, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:16.558105", + "step": 6787, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.068051829468459e-05, + "timestamp": "2025-09-10 02:30:16.590893", + "step": 6788, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:30:16.627308", + "step": 6788, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024358855444006622, + "timestamp": "2025-09-10 02:30:16.640280", + "step": 6789, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:16.674026", + "step": 6789, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007300904835574329, + "timestamp": "2025-09-10 02:30:16.678254", + "step": 6790, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:16.716899", + "step": 6790, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.7813763153972104e-05, + "timestamp": "2025-09-10 02:30:16.719438", + "step": 6791, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:16.760030", + "step": 6791, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.391224400838837e-05, + "timestamp": "2025-09-10 02:30:16.788260", + "step": 6792, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:16.824861", + "step": 6792, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007873232243582606, + "timestamp": "2025-09-10 02:30:16.829373", + "step": 6793, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 512 + ], + "flops": 15187581968384 + }, + "timestamp": "2025-09-10 02:30:16.874610", + "step": 6793, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.269152749562636e-05, + "timestamp": "2025-09-10 02:30:16.892285", + "step": 6794, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:16.929849", + "step": 6794, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020102993585169315, + "timestamp": "2025-09-10 02:30:16.937347", + "step": 6795, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:16.970154", + "step": 6795, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012203119695186615, + "timestamp": "2025-09-10 02:30:16.997874", + "step": 6796, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:17.029699", + "step": 6796, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.010345556773245335, + "timestamp": "2025-09-10 02:30:17.039296", + "step": 6797, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:17.073566", + "step": 6797, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007398456335067749, + "timestamp": "2025-09-10 02:30:17.077519", + "step": 6798, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:30:17.119440", + "step": 6798, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006762798875570297, + "timestamp": "2025-09-10 02:30:17.135062", + "step": 6799, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:17.168051", + "step": 6799, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012070146476617083, + "timestamp": "2025-09-10 02:30:17.196027", + "step": 6800, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:17.230664", + "step": 6800, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.54533107788302e-05, + "timestamp": "2025-09-10 02:30:17.247016", + "step": 6801, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:30:17.285299", + "step": 6801, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.327310256892815e-05, + "timestamp": "2025-09-10 02:30:17.299295", + "step": 6802, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:17.331893", + "step": 6802, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.030736997723579407, + "timestamp": "2025-09-10 02:30:17.334233", + "step": 6803, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:17.369517", + "step": 6803, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001166960719274357, + "timestamp": "2025-09-10 02:30:17.402857", + "step": 6804, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:30:17.440516", + "step": 6804, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021152500994503498, + "timestamp": "2025-09-10 02:30:17.455582", + "step": 6805, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:17.489705", + "step": 6805, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016026229423005134, + "timestamp": "2025-09-10 02:30:17.492235", + "step": 6806, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:30:17.531358", + "step": 6806, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014069009339436889, + "timestamp": "2025-09-10 02:30:17.546926", + "step": 6807, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:17.581034", + "step": 6807, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012990307295694947, + "timestamp": "2025-09-10 02:30:17.609309", + "step": 6808, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:17.642719", + "step": 6808, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0327615961432457, + "timestamp": "2025-09-10 02:30:17.650909", + "step": 6809, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:17.682485", + "step": 6809, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003256215713918209, + "timestamp": "2025-09-10 02:30:17.689780", + "step": 6810, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:17.722498", + "step": 6810, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016400113236159086, + "timestamp": "2025-09-10 02:30:17.732387", + "step": 6811, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:17.765986", + "step": 6811, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.386579919606447e-05, + "timestamp": "2025-09-10 02:30:17.793719", + "step": 6812, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:17.829048", + "step": 6812, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.543731771875173e-05, + "timestamp": "2025-09-10 02:30:17.838282", + "step": 6813, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:17.871913", + "step": 6813, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010472764261066914, + "timestamp": "2025-09-10 02:30:17.882138", + "step": 6814, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:17.915235", + "step": 6814, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010902778012678027, + "timestamp": "2025-09-10 02:30:17.917728", + "step": 6815, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:17.951419", + "step": 6815, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002544302260503173, + "timestamp": "2025-09-10 02:30:17.976575", + "step": 6816, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:18.008867", + "step": 6816, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003031869127880782, + "timestamp": "2025-09-10 02:30:18.013772", + "step": 6817, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:18.049470", + "step": 6817, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00049980339827016, + "timestamp": "2025-09-10 02:30:18.060303", + "step": 6818, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:18.097031", + "step": 6818, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019091797003056854, + "timestamp": "2025-09-10 02:30:18.110426", + "step": 6819, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:18.143731", + "step": 6819, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006083925254642963, + "timestamp": "2025-09-10 02:30:18.168937", + "step": 6820, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:18.203331", + "step": 6820, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.496772114885971e-05, + "timestamp": "2025-09-10 02:30:18.216022", + "step": 6821, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:18.251835", + "step": 6821, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001554272894281894, + "timestamp": "2025-09-10 02:30:18.255935", + "step": 6822, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:18.291116", + "step": 6822, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005963979056105018, + "timestamp": "2025-09-10 02:30:18.301097", + "step": 6823, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:18.332866", + "step": 6823, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.008737495401874e-05, + "timestamp": "2025-09-10 02:30:18.364544", + "step": 6824, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:18.399060", + "step": 6824, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042097517871297896, + "timestamp": "2025-09-10 02:30:18.411561", + "step": 6825, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:18.448099", + "step": 6825, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.969293463043869e-05, + "timestamp": "2025-09-10 02:30:18.457796", + "step": 6826, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:18.493630", + "step": 6826, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0014862669631838799, + "timestamp": "2025-09-10 02:30:18.501335", + "step": 6827, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:18.535521", + "step": 6827, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00019058329053223133, + "timestamp": "2025-09-10 02:30:18.563006", + "step": 6828, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:18.601204", + "step": 6828, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002576792612671852, + "timestamp": "2025-09-10 02:30:18.605856", + "step": 6829, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:18.637920", + "step": 6829, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004352860269136727, + "timestamp": "2025-09-10 02:30:18.645472", + "step": 6830, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:18.681478", + "step": 6830, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.436298720771447e-05, + "timestamp": "2025-09-10 02:30:18.688357", + "step": 6831, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:18.724012", + "step": 6831, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022188770526554435, + "timestamp": "2025-09-10 02:30:18.749357", + "step": 6832, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:18.780158", + "step": 6832, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015023789601400495, + "timestamp": "2025-09-10 02:30:18.784655", + "step": 6833, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:18.820484", + "step": 6833, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012278666254132986, + "timestamp": "2025-09-10 02:30:18.832468", + "step": 6834, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:18.865085", + "step": 6834, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005471336655318737, + "timestamp": "2025-09-10 02:30:18.869075", + "step": 6835, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:18.901174", + "step": 6835, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007918172632344067, + "timestamp": "2025-09-10 02:30:18.929621", + "step": 6836, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:18.960867", + "step": 6836, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016386432980652899, + "timestamp": "2025-09-10 02:30:18.968220", + "step": 6837, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:18.998902", + "step": 6837, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00040015511331148446, + "timestamp": "2025-09-10 02:30:19.005819", + "step": 6838, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:19.039481", + "step": 6838, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003809529298450798, + "timestamp": "2025-09-10 02:30:19.050226", + "step": 6839, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:19.089016", + "step": 6839, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012751105532515794, + "timestamp": "2025-09-10 02:30:19.116968", + "step": 6840, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:19.154062", + "step": 6840, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002030668401857838, + "timestamp": "2025-09-10 02:30:19.156202", + "step": 6841, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:19.196680", + "step": 6841, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.849247387843207e-05, + "timestamp": "2025-09-10 02:30:19.207016", + "step": 6842, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:19.240281", + "step": 6842, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.702614675508812e-05, + "timestamp": "2025-09-10 02:30:19.242811", + "step": 6843, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:19.274075", + "step": 6843, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011114530934719369, + "timestamp": "2025-09-10 02:30:19.299505", + "step": 6844, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:19.330251", + "step": 6844, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00036023682332597673, + "timestamp": "2025-09-10 02:30:19.332826", + "step": 6845, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:19.363969", + "step": 6845, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022267237363848835, + "timestamp": "2025-09-10 02:30:19.374234", + "step": 6846, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:19.413588", + "step": 6846, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00026117305969819427, + "timestamp": "2025-09-10 02:30:19.426145", + "step": 6847, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:19.457215", + "step": 6847, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017910859605763108, + "timestamp": "2025-09-10 02:30:19.484847", + "step": 6848, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:19.518918", + "step": 6848, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025339677813462913, + "timestamp": "2025-09-10 02:30:19.528648", + "step": 6849, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:30:19.569357", + "step": 6849, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00041114582563750446, + "timestamp": "2025-09-10 02:30:19.585004", + "step": 6850, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:19.626859", + "step": 6850, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.002163324737921357, + "timestamp": "2025-09-10 02:30:19.640712", + "step": 6851, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:19.677256", + "step": 6851, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018701299268286675, + "timestamp": "2025-09-10 02:30:19.705133", + "step": 6852, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:19.738947", + "step": 6852, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005152305820956826, + "timestamp": "2025-09-10 02:30:19.751590", + "step": 6853, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:19.785229", + "step": 6853, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007716981228441, + "timestamp": "2025-09-10 02:30:19.792581", + "step": 6854, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:19.823461", + "step": 6854, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002965559542644769, + "timestamp": "2025-09-10 02:30:19.826056", + "step": 6855, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:19.862266", + "step": 6855, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017139650299213827, + "timestamp": "2025-09-10 02:30:19.893207", + "step": 6856, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:19.928880", + "step": 6856, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021169218234717846, + "timestamp": "2025-09-10 02:30:19.936746", + "step": 6857, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:19.976093", + "step": 6857, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011072350753238425, + "timestamp": "2025-09-10 02:30:19.989442", + "step": 6858, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:20.022347", + "step": 6858, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0019564726389944553, + "timestamp": "2025-09-10 02:30:20.029521", + "step": 6859, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:20.062143", + "step": 6859, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042336867772974074, + "timestamp": "2025-09-10 02:30:20.095512", + "step": 6860, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:20.133531", + "step": 6860, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017614095122553408, + "timestamp": "2025-09-10 02:30:20.138738", + "step": 6861, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:20.174233", + "step": 6861, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017980553093366325, + "timestamp": "2025-09-10 02:30:20.184889", + "step": 6862, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:20.222653", + "step": 6862, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.931239815661684e-05, + "timestamp": "2025-09-10 02:30:20.230060", + "step": 6863, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:20.260929", + "step": 6863, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.02051333151757717, + "timestamp": "2025-09-10 02:30:20.285056", + "step": 6864, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:30:20.325514", + "step": 6864, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.088215690804645e-05, + "timestamp": "2025-09-10 02:30:20.338810", + "step": 6865, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:20.372656", + "step": 6865, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001564481353852898, + "timestamp": "2025-09-10 02:30:20.379373", + "step": 6866, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:20.414627", + "step": 6866, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010649115574778989, + "timestamp": "2025-09-10 02:30:20.427159", + "step": 6867, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:20.457949", + "step": 6867, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001269724016310647, + "timestamp": "2025-09-10 02:30:20.486703", + "step": 6868, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:20.524304", + "step": 6868, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010912258876487613, + "timestamp": "2025-09-10 02:30:20.529129", + "step": 6869, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:20.560997", + "step": 6869, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.636014874558896e-05, + "timestamp": "2025-09-10 02:30:20.567548", + "step": 6870, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:20.602864", + "step": 6870, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000113781621621456, + "timestamp": "2025-09-10 02:30:20.616611", + "step": 6871, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:20.654164", + "step": 6871, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.113257692661136e-05, + "timestamp": "2025-09-10 02:30:20.685367", + "step": 6872, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:20.715516", + "step": 6872, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006366458837874234, + "timestamp": "2025-09-10 02:30:20.720287", + "step": 6873, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:20.751250", + "step": 6873, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002216670400230214, + "timestamp": "2025-09-10 02:30:20.758358", + "step": 6874, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:20.797148", + "step": 6874, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001820830802898854, + "timestamp": "2025-09-10 02:30:20.809371", + "step": 6875, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:20.848562", + "step": 6875, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.005944172386080027, + "timestamp": "2025-09-10 02:30:20.876414", + "step": 6876, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:20.909153", + "step": 6876, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.662976036546752e-05, + "timestamp": "2025-09-10 02:30:20.911434", + "step": 6877, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:20.951774", + "step": 6877, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00043625704711303115, + "timestamp": "2025-09-10 02:30:20.965576", + "step": 6878, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:21.004372", + "step": 6878, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010746198677225038, + "timestamp": "2025-09-10 02:30:21.012127", + "step": 6879, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:21.044248", + "step": 6879, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00030602794140577316, + "timestamp": "2025-09-10 02:30:21.075223", + "step": 6880, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:21.114259", + "step": 6880, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00037156790494918823, + "timestamp": "2025-09-10 02:30:21.121592", + "step": 6881, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:21.152891", + "step": 6881, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018727740098256618, + "timestamp": "2025-09-10 02:30:21.163096", + "step": 6882, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:21.198073", + "step": 6882, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.839085396379232e-05, + "timestamp": "2025-09-10 02:30:21.202551", + "step": 6883, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:21.233647", + "step": 6883, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000886984693352133, + "timestamp": "2025-09-10 02:30:21.261268", + "step": 6884, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:21.294025", + "step": 6884, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016462511848658323, + "timestamp": "2025-09-10 02:30:21.301922", + "step": 6885, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:21.333917", + "step": 6885, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0043139709159731865, + "timestamp": "2025-09-10 02:30:21.340952", + "step": 6886, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:21.380516", + "step": 6886, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008590264478698373, + "timestamp": "2025-09-10 02:30:21.387960", + "step": 6887, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:21.424227", + "step": 6887, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0003677209315355867, + "timestamp": "2025-09-10 02:30:21.452110", + "step": 6888, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:21.482477", + "step": 6888, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.307049185736105e-05, + "timestamp": "2025-09-10 02:30:21.491069", + "step": 6889, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:21.521828", + "step": 6889, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.364223322132602e-05, + "timestamp": "2025-09-10 02:30:21.534406", + "step": 6890, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:21.570779", + "step": 6890, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047637257375754416, + "timestamp": "2025-09-10 02:30:21.574792", + "step": 6891, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:21.607469", + "step": 6891, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0028176165651530027, + "timestamp": "2025-09-10 02:30:21.632477", + "step": 6892, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:21.697379", + "step": 6892, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002470030449330807, + "timestamp": "2025-09-10 02:30:21.699510", + "step": 6893, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:21.735128", + "step": 6893, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006550102843903005, + "timestamp": "2025-09-10 02:30:21.748541", + "step": 6894, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:21.789661", + "step": 6894, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011021040700143203, + "timestamp": "2025-09-10 02:30:21.796606", + "step": 6895, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:21.831675", + "step": 6895, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010865674121305346, + "timestamp": "2025-09-10 02:30:21.860184", + "step": 6896, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:21.893488", + "step": 6896, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001547595311421901, + "timestamp": "2025-09-10 02:30:21.896158", + "step": 6897, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:21.926703", + "step": 6897, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014410761650651693, + "timestamp": "2025-09-10 02:30:21.937151", + "step": 6898, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:21.967455", + "step": 6898, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005947855534031987, + "timestamp": "2025-09-10 02:30:21.971499", + "step": 6899, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:22.002647", + "step": 6899, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00025538000045344234, + "timestamp": "2025-09-10 02:30:22.027939", + "step": 6900, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:22.060817", + "step": 6900, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005543892038986087, + "timestamp": "2025-09-10 02:30:22.073926", + "step": 6901, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:22.114769", + "step": 6901, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0021151488181203604, + "timestamp": "2025-09-10 02:30:22.121566", + "step": 6902, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:22.162625", + "step": 6902, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011541605636011809, + "timestamp": "2025-09-10 02:30:22.166933", + "step": 6903, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:22.197743", + "step": 6903, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013175479834899306, + "timestamp": "2025-09-10 02:30:22.226283", + "step": 6904, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:30:22.281595", + "step": 6904, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.248906432418153e-05, + "timestamp": "2025-09-10 02:30:22.298306", + "step": 6905, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:22.328426", + "step": 6905, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.818878369405866e-05, + "timestamp": "2025-09-10 02:30:22.330887", + "step": 6906, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:22.361346", + "step": 6906, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005144443712197244, + "timestamp": "2025-09-10 02:30:22.368298", + "step": 6907, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:22.400777", + "step": 6907, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.7616198571631685e-05, + "timestamp": "2025-09-10 02:30:22.434318", + "step": 6908, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:22.469807", + "step": 6908, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013233958452474326, + "timestamp": "2025-09-10 02:30:22.474890", + "step": 6909, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:30:32.571318", + "step": 6909, + "epoch": 3 + }, + { + "type": "pplx", + "content": 24853930.15520345, + "timestamp": "2025-09-10 02:30:32.574149", + "step": 6909, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:32.608153", + "step": 6909, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.375996887683868e-05, + "timestamp": "2025-09-10 02:30:32.616912", + "step": 6910, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:32.650941", + "step": 6910, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002628415822982788, + "timestamp": "2025-09-10 02:30:32.654724", + "step": 6911, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:32.687238", + "step": 6911, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002938093966804445, + "timestamp": "2025-09-10 02:30:32.711218", + "step": 6912, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:32.742241", + "step": 6912, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008156453259289265, + "timestamp": "2025-09-10 02:30:32.744693", + "step": 6913, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:32.775489", + "step": 6913, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015855650417506695, + "timestamp": "2025-09-10 02:30:32.779710", + "step": 6914, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:32.811619", + "step": 6914, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.417724216589704e-05, + "timestamp": "2025-09-10 02:30:32.815751", + "step": 6915, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:32.847821", + "step": 6915, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011772527359426022, + "timestamp": "2025-09-10 02:30:32.875766", + "step": 6916, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:32.908515", + "step": 6916, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0008328754338435829, + "timestamp": "2025-09-10 02:30:32.912764", + "step": 6917, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:32.944174", + "step": 6917, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012226430408190936, + "timestamp": "2025-09-10 02:30:32.948156", + "step": 6918, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:32.979169", + "step": 6918, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023380214406643063, + "timestamp": "2025-09-10 02:30:32.985922", + "step": 6919, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:33.017286", + "step": 6919, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002332628209842369, + "timestamp": "2025-09-10 02:30:33.049690", + "step": 6920, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:33.081821", + "step": 6920, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000248458469286561, + "timestamp": "2025-09-10 02:30:33.086063", + "step": 6921, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:33.117484", + "step": 6921, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023750065884087235, + "timestamp": "2025-09-10 02:30:33.127207", + "step": 6922, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:33.158861", + "step": 6922, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011210433149244636, + "timestamp": "2025-09-10 02:30:33.165640", + "step": 6923, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:33.198408", + "step": 6923, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017960583500098437, + "timestamp": "2025-09-10 02:30:33.226801", + "step": 6924, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:33.259757", + "step": 6924, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010823294724104926, + "timestamp": "2025-09-10 02:30:33.264770", + "step": 6925, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:33.298052", + "step": 6925, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.001127683324739337, + "timestamp": "2025-09-10 02:30:33.304551", + "step": 6926, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:33.347085", + "step": 6926, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002121505531249568, + "timestamp": "2025-09-10 02:30:33.353818", + "step": 6927, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:33.387708", + "step": 6927, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00015519419685006142, + "timestamp": "2025-09-10 02:30:33.415510", + "step": 6928, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:33.458245", + "step": 6928, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001299724681302905, + "timestamp": "2025-09-10 02:30:33.466105", + "step": 6929, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:33.500059", + "step": 6929, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001691354700597003, + "timestamp": "2025-09-10 02:30:33.507491", + "step": 6930, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:33.539099", + "step": 6930, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.918951789382845e-05, + "timestamp": "2025-09-10 02:30:33.546449", + "step": 6931, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:33.579426", + "step": 6931, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011090342741226777, + "timestamp": "2025-09-10 02:30:33.604561", + "step": 6932, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:33.636469", + "step": 6932, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00038635358214378357, + "timestamp": "2025-09-10 02:30:33.640763", + "step": 6933, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:33.675256", + "step": 6933, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012322509428486228, + "timestamp": "2025-09-10 02:30:33.686697", + "step": 6934, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:33.719036", + "step": 6934, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0012386480811983347, + "timestamp": "2025-09-10 02:30:33.726134", + "step": 6935, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:33.758568", + "step": 6935, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.629237107816152e-05, + "timestamp": "2025-09-10 02:30:33.786432", + "step": 6936, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:33.819712", + "step": 6936, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.105556142050773e-05, + "timestamp": "2025-09-10 02:30:33.823893", + "step": 6937, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:33.859795", + "step": 6937, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.639743802836165e-05, + "timestamp": "2025-09-10 02:30:33.869595", + "step": 6938, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:33.904854", + "step": 6938, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.235136243049055e-05, + "timestamp": "2025-09-10 02:30:33.914782", + "step": 6939, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:33.947402", + "step": 6939, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002960147976409644, + "timestamp": "2025-09-10 02:30:33.979908", + "step": 6940, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:34.012860", + "step": 6940, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.007601078599691391, + "timestamp": "2025-09-10 02:30:34.024405", + "step": 6941, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:34.057014", + "step": 6941, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.58233337465208e-05, + "timestamp": "2025-09-10 02:30:34.063787", + "step": 6942, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:34.100482", + "step": 6942, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0015243064844980836, + "timestamp": "2025-09-10 02:30:34.103494", + "step": 6943, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:34.138373", + "step": 6943, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00024156909785233438, + "timestamp": "2025-09-10 02:30:34.165987", + "step": 6944, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:30:34.216927", + "step": 6944, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00428308779373765, + "timestamp": "2025-09-10 02:30:34.233895", + "step": 6945, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:34.272835", + "step": 6945, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029404859989881516, + "timestamp": "2025-09-10 02:30:34.282825", + "step": 6946, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:34.329763", + "step": 6946, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013546801346819848, + "timestamp": "2025-09-10 02:30:34.337397", + "step": 6947, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:34.375921", + "step": 6947, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.598109833546914e-05, + "timestamp": "2025-09-10 02:30:34.404145", + "step": 6948, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:34.439246", + "step": 6948, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020628688798751682, + "timestamp": "2025-09-10 02:30:34.442744", + "step": 6949, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:30:34.487136", + "step": 6949, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010723454033723101, + "timestamp": "2025-09-10 02:30:34.503012", + "step": 6950, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:34.539197", + "step": 6950, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011623270984273404, + "timestamp": "2025-09-10 02:30:34.547880", + "step": 6951, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:34.586654", + "step": 6951, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001242592406924814, + "timestamp": "2025-09-10 02:30:34.611887", + "step": 6952, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:34.643970", + "step": 6952, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004370961687527597, + "timestamp": "2025-09-10 02:30:34.649214", + "step": 6953, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:34.690287", + "step": 6953, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002730258565861732, + "timestamp": "2025-09-10 02:30:34.700991", + "step": 6954, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:34.736851", + "step": 6954, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011180860747117549, + "timestamp": "2025-09-10 02:30:34.744122", + "step": 6955, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:34.783004", + "step": 6955, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004824143834412098, + "timestamp": "2025-09-10 02:30:34.817426", + "step": 6956, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:34.849646", + "step": 6956, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00022752817312721163, + "timestamp": "2025-09-10 02:30:34.851825", + "step": 6957, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:34.883349", + "step": 6957, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0062463474459946156, + "timestamp": "2025-09-10 02:30:34.890202", + "step": 6958, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:34.922061", + "step": 6958, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.5043336033122614e-05, + "timestamp": "2025-09-10 02:30:34.928820", + "step": 6959, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 496 + ], + "flops": 14712978242368 + }, + "timestamp": "2025-09-10 02:30:34.972076", + "step": 6959, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005719130276702344, + "timestamp": "2025-09-10 02:30:35.010467", + "step": 6960, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:35.043747", + "step": 6960, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00044621675624512136, + "timestamp": "2025-09-10 02:30:35.050415", + "step": 6961, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:35.084469", + "step": 6961, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034819470602087677, + "timestamp": "2025-09-10 02:30:35.094215", + "step": 6962, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:35.126745", + "step": 6962, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016455540026072413, + "timestamp": "2025-09-10 02:30:35.133099", + "step": 6963, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:35.164791", + "step": 6963, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.0337421271251515e-05, + "timestamp": "2025-09-10 02:30:35.188773", + "step": 6964, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:35.221492", + "step": 6964, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00010189624299528077, + "timestamp": "2025-09-10 02:30:35.226632", + "step": 6965, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 416 + ], + "flops": 12339959612288 + }, + "timestamp": "2025-09-10 02:30:35.264821", + "step": 6965, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.955454667448066e-05, + "timestamp": "2025-09-10 02:30:35.280698", + "step": 6966, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:35.314438", + "step": 6966, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0009393363143317401, + "timestamp": "2025-09-10 02:30:35.324855", + "step": 6967, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:35.356816", + "step": 6967, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.750239193323068e-05, + "timestamp": "2025-09-10 02:30:35.384330", + "step": 6968, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:35.418787", + "step": 6968, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.956552872201428e-05, + "timestamp": "2025-09-10 02:30:35.423625", + "step": 6969, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:35.459261", + "step": 6969, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006180386990308762, + "timestamp": "2025-09-10 02:30:35.466933", + "step": 6970, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:35.502471", + "step": 6970, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00042301107896491885, + "timestamp": "2025-09-10 02:30:35.516228", + "step": 6971, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:35.547668", + "step": 6971, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011797425395343453, + "timestamp": "2025-09-10 02:30:35.575234", + "step": 6972, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:35.607400", + "step": 6972, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.7395021131960675e-05, + "timestamp": "2025-09-10 02:30:35.612305", + "step": 6973, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:35.643800", + "step": 6973, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.854817962041125e-05, + "timestamp": "2025-09-10 02:30:35.650850", + "step": 6974, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:35.681964", + "step": 6974, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004031723365187645, + "timestamp": "2025-09-10 02:30:35.692077", + "step": 6975, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:35.725419", + "step": 6975, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.026183495298027992, + "timestamp": "2025-09-10 02:30:35.749986", + "step": 6976, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:30:35.787096", + "step": 6976, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.9930823327740654e-05, + "timestamp": "2025-09-10 02:30:35.802258", + "step": 6977, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:35.835958", + "step": 6977, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.449880387051962e-05, + "timestamp": "2025-09-10 02:30:35.842859", + "step": 6978, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:35.875898", + "step": 6978, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.551814633188769e-05, + "timestamp": "2025-09-10 02:30:35.886516", + "step": 6979, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:35.920589", + "step": 6979, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020103261340409517, + "timestamp": "2025-09-10 02:30:35.954756", + "step": 6980, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:35.987128", + "step": 6980, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001782312901923433, + "timestamp": "2025-09-10 02:30:35.996757", + "step": 6981, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:36.032245", + "step": 6981, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.386146222008392e-05, + "timestamp": "2025-09-10 02:30:36.039500", + "step": 6982, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:36.071475", + "step": 6982, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.014369412325322628, + "timestamp": "2025-09-10 02:30:36.083000", + "step": 6983, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:36.114751", + "step": 6983, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018798027304001153, + "timestamp": "2025-09-10 02:30:36.142496", + "step": 6984, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:36.174099", + "step": 6984, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00023625533503945917, + "timestamp": "2025-09-10 02:30:36.186822", + "step": 6985, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:36.218572", + "step": 6985, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.370379509869963e-05, + "timestamp": "2025-09-10 02:30:36.231242", + "step": 6986, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:36.262615", + "step": 6986, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004739656869787723, + "timestamp": "2025-09-10 02:30:36.269571", + "step": 6987, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:36.300097", + "step": 6987, + "epoch": 3 + }, + { + "type": "loss", + "content": 1.8654181985766627e-05, + "timestamp": "2025-09-10 02:30:36.323594", + "step": 6988, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:36.353988", + "step": 6988, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.558034121757373e-05, + "timestamp": "2025-09-10 02:30:36.356105", + "step": 6989, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:36.387777", + "step": 6989, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002668748202268034, + "timestamp": "2025-09-10 02:30:36.394720", + "step": 6990, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:36.425215", + "step": 6990, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021226401440799236, + "timestamp": "2025-09-10 02:30:36.432234", + "step": 6991, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:36.463874", + "step": 6991, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00016221609257627279, + "timestamp": "2025-09-10 02:30:36.492197", + "step": 6992, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:36.522883", + "step": 6992, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.354503370355815e-05, + "timestamp": "2025-09-10 02:30:36.528161", + "step": 6993, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:36.559418", + "step": 6993, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.1902184016653337e-05, + "timestamp": "2025-09-10 02:30:36.569482", + "step": 6994, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:36.602148", + "step": 6994, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011977553367614746, + "timestamp": "2025-09-10 02:30:36.608742", + "step": 6995, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:30:36.645587", + "step": 6995, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002517557586543262, + "timestamp": "2025-09-10 02:30:36.680558", + "step": 6996, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:36.714025", + "step": 6996, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.101787145482376e-05, + "timestamp": "2025-09-10 02:30:36.727174", + "step": 6997, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:36.758526", + "step": 6997, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001227896282216534, + "timestamp": "2025-09-10 02:30:36.762573", + "step": 6998, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 368 + ], + "flops": 10916148434240 + }, + "timestamp": "2025-09-10 02:30:36.797864", + "step": 6998, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.544071559095755e-05, + "timestamp": "2025-09-10 02:30:36.811692", + "step": 6999, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:36.844402", + "step": 6999, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.003692924277856946, + "timestamp": "2025-09-10 02:30:36.872282", + "step": 7000, + "epoch": 3 + }, + { + "type": "info", + "content": "Checkpoint saved at step 7000", + "timestamp": "2025-09-10 02:30:42.318498", + "step": 7000, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:42.362451", + "step": 7000, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.54517168388702e-05, + "timestamp": "2025-09-10 02:30:42.366545", + "step": 7001, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:42.398762", + "step": 7001, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.800796447554603e-05, + "timestamp": "2025-09-10 02:30:42.402311", + "step": 7002, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:42.434025", + "step": 7002, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.029476981610059738, + "timestamp": "2025-09-10 02:30:42.440768", + "step": 7003, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:42.472458", + "step": 7003, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001302637974731624, + "timestamp": "2025-09-10 02:30:42.496887", + "step": 7004, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:42.530135", + "step": 7004, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011870044283568859, + "timestamp": "2025-09-10 02:30:42.540086", + "step": 7005, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:42.573458", + "step": 7005, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021210841077845544, + "timestamp": "2025-09-10 02:30:42.580218", + "step": 7006, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:42.612231", + "step": 7006, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.902814882574603e-05, + "timestamp": "2025-09-10 02:30:42.619093", + "step": 7007, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:42.652006", + "step": 7007, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0002512595965526998, + "timestamp": "2025-09-10 02:30:42.682861", + "step": 7008, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:42.718324", + "step": 7008, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005681976908817887, + "timestamp": "2025-09-10 02:30:42.726159", + "step": 7009, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:42.761462", + "step": 7009, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010124502005055547, + "timestamp": "2025-09-10 02:30:42.766074", + "step": 7010, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 144 + ], + "flops": 4271696270016 + }, + "timestamp": "2025-09-10 02:30:42.797001", + "step": 7010, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010337395360693336, + "timestamp": "2025-09-10 02:30:42.799307", + "step": 7011, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:42.831691", + "step": 7011, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.000359332247171551, + "timestamp": "2025-09-10 02:30:42.860163", + "step": 7012, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:42.892069", + "step": 7012, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001721430744510144, + "timestamp": "2025-09-10 02:30:42.896594", + "step": 7013, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:42.928749", + "step": 7013, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001188502719742246, + "timestamp": "2025-09-10 02:30:42.939898", + "step": 7014, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:42.975516", + "step": 7014, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013335456606000662, + "timestamp": "2025-09-10 02:30:42.979666", + "step": 7015, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:43.012243", + "step": 7015, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.617595110787079e-05, + "timestamp": "2025-09-10 02:30:43.036918", + "step": 7016, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:43.071708", + "step": 7016, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004477399925235659, + "timestamp": "2025-09-10 02:30:43.080414", + "step": 7017, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:43.120632", + "step": 7017, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.525385055691004e-05, + "timestamp": "2025-09-10 02:30:43.134005", + "step": 7018, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:43.167311", + "step": 7018, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006595923332497478, + "timestamp": "2025-09-10 02:30:43.174667", + "step": 7019, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 400 + ], + "flops": 11865355886272 + }, + "timestamp": "2025-09-10 02:30:43.214268", + "step": 7019, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.695385359809734e-05, + "timestamp": "2025-09-10 02:30:43.250768", + "step": 7020, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:43.281538", + "step": 7020, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017301096522714943, + "timestamp": "2025-09-10 02:30:43.299985", + "step": 7021, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 480 + ], + "flops": 14238374516352 + }, + "timestamp": "2025-09-10 02:30:43.357493", + "step": 7021, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001524223480373621, + "timestamp": "2025-09-10 02:30:43.374840", + "step": 7022, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:43.405945", + "step": 7022, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00013412600674200803, + "timestamp": "2025-09-10 02:30:43.413011", + "step": 7023, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:43.448479", + "step": 7023, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017144733283203095, + "timestamp": "2025-09-10 02:30:43.473773", + "step": 7024, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:43.510354", + "step": 7024, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034450864768587053, + "timestamp": "2025-09-10 02:30:43.512714", + "step": 7025, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:43.544358", + "step": 7025, + "epoch": 3 + }, + { + "type": "loss", + "content": 6.39140052953735e-05, + "timestamp": "2025-09-10 02:30:43.551274", + "step": 7026, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:43.584415", + "step": 7026, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0004727788909804076, + "timestamp": "2025-09-10 02:30:43.591039", + "step": 7027, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:43.622903", + "step": 7027, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0025320053100585938, + "timestamp": "2025-09-10 02:30:43.654789", + "step": 7028, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:43.690290", + "step": 7028, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012714836339000612, + "timestamp": "2025-09-10 02:30:43.697951", + "step": 7029, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:43.733691", + "step": 7029, + "epoch": 3 + }, + { + "type": "loss", + "content": 8.670837996760383e-05, + "timestamp": "2025-09-10 02:30:43.740638", + "step": 7030, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 128 + ], + "flops": 3797092544000 + }, + "timestamp": "2025-09-10 02:30:43.773773", + "step": 7030, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.011012664996087551, + "timestamp": "2025-09-10 02:30:43.778300", + "step": 7031, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:43.811604", + "step": 7031, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00020046999270562083, + "timestamp": "2025-09-10 02:30:43.836388", + "step": 7032, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 320 + ], + "flops": 9492337256192 + }, + "timestamp": "2025-09-10 02:30:43.869027", + "step": 7032, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012668267299886793, + "timestamp": "2025-09-10 02:30:43.878628", + "step": 7033, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 160 + ], + "flops": 4746299996032 + }, + "timestamp": "2025-09-10 02:30:43.910975", + "step": 7033, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.517674799193628e-05, + "timestamp": "2025-09-10 02:30:43.913744", + "step": 7034, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:43.947657", + "step": 7034, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005423200782388449, + "timestamp": "2025-09-10 02:30:43.954538", + "step": 7035, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:30:43.990008", + "step": 7035, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0030603199265897274, + "timestamp": "2025-09-10 02:30:44.024506", + "step": 7036, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:30:44.062450", + "step": 7036, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.9157235227758065e-05, + "timestamp": "2025-09-10 02:30:44.075484", + "step": 7037, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:44.117288", + "step": 7037, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.580377597245388e-05, + "timestamp": "2025-09-10 02:30:44.124431", + "step": 7038, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:44.157272", + "step": 7038, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00011388809798518196, + "timestamp": "2025-09-10 02:30:44.168210", + "step": 7039, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:44.200882", + "step": 7039, + "epoch": 3 + }, + { + "type": "loss", + "content": 7.329420623136684e-05, + "timestamp": "2025-09-10 02:30:44.232066", + "step": 7040, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 384 + ], + "flops": 11390752160256 + }, + "timestamp": "2025-09-10 02:30:44.270763", + "step": 7040, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.624590165913105e-05, + "timestamp": "2025-09-10 02:30:44.284081", + "step": 7041, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:30:44.334268", + "step": 7041, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00018388082389719784, + "timestamp": "2025-09-10 02:30:44.351364", + "step": 7042, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:44.383125", + "step": 7042, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.913156640715897e-05, + "timestamp": "2025-09-10 02:30:44.387484", + "step": 7043, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:44.419803", + "step": 7043, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005409237928688526, + "timestamp": "2025-09-10 02:30:44.445221", + "step": 7044, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:44.480208", + "step": 7044, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.076716918963939e-05, + "timestamp": "2025-09-10 02:30:44.492853", + "step": 7045, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 272 + ], + "flops": 8068526078144 + }, + "timestamp": "2025-09-10 02:30:44.528299", + "step": 7045, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00021151323744561523, + "timestamp": "2025-09-10 02:30:44.538395", + "step": 7046, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:44.573351", + "step": 7046, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.7153665946098045e-05, + "timestamp": "2025-09-10 02:30:44.580768", + "step": 7047, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 304 + ], + "flops": 9017733530176 + }, + "timestamp": "2025-09-10 02:30:44.616096", + "step": 7047, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0007388163357973099, + "timestamp": "2025-09-10 02:30:44.649270", + "step": 7048, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:44.684828", + "step": 7048, + "epoch": 3 + }, + { + "type": "loss", + "content": 9.87757885013707e-05, + "timestamp": "2025-09-10 02:30:44.688511", + "step": 7049, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:44.720590", + "step": 7049, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.2101851906627417e-05, + "timestamp": "2025-09-10 02:30:44.728049", + "step": 7050, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 208 + ], + "flops": 6170111174080 + }, + "timestamp": "2025-09-10 02:30:44.760527", + "step": 7050, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.3754731450462714e-05, + "timestamp": "2025-09-10 02:30:44.767356", + "step": 7051, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 464 + ], + "flops": 13763770790336 + }, + "timestamp": "2025-09-10 02:30:44.810758", + "step": 7051, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0001889723789645359, + "timestamp": "2025-09-10 02:30:44.848646", + "step": 7052, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 224 + ], + "flops": 6644714900096 + }, + "timestamp": "2025-09-10 02:30:44.884716", + "step": 7052, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00012450621579773724, + "timestamp": "2025-09-10 02:30:44.889308", + "step": 7053, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:44.920683", + "step": 7053, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00014559333794750273, + "timestamp": "2025-09-10 02:30:44.928109", + "step": 7054, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 336 + ], + "flops": 9966940982208 + }, + "timestamp": "2025-09-10 02:30:44.964282", + "step": 7054, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017697580915410072, + "timestamp": "2025-09-10 02:30:44.977610", + "step": 7055, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 448 + ], + "flops": 13289167064320 + }, + "timestamp": "2025-09-10 02:30:45.023083", + "step": 7055, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00017205542826559395, + "timestamp": "2025-09-10 02:30:45.060332", + "step": 7056, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:30:55.373116", + "step": 7056, + "epoch": 3 + }, + { + "type": "pplx", + "content": 26144192.90405417, + "timestamp": "2025-09-10 02:30:55.376345", + "step": 7056, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:55.406505", + "step": 7056, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0005065679433755577, + "timestamp": "2025-09-10 02:30:55.410743", + "step": 7057, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:55.441447", + "step": 7057, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0013099844800308347, + "timestamp": "2025-09-10 02:30:55.449030", + "step": 7058, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 256 + ], + "flops": 7593922352128 + }, + "timestamp": "2025-09-10 02:30:55.481859", + "step": 7058, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.810837370110676e-05, + "timestamp": "2025-09-10 02:30:55.489460", + "step": 7059, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:55.521106", + "step": 7059, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0017356050666421652, + "timestamp": "2025-09-10 02:30:55.549027", + "step": 7060, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:55.580238", + "step": 7060, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00034908336238004267, + "timestamp": "2025-09-10 02:30:55.585395", + "step": 7061, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:55.616476", + "step": 7061, + "epoch": 3 + }, + { + "type": "loss", + "content": 5.2377414249349385e-05, + "timestamp": "2025-09-10 02:30:55.627386", + "step": 7062, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 176 + ], + "flops": 5220903722048 + }, + "timestamp": "2025-09-10 02:30:55.657587", + "step": 7062, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0034754632506519556, + "timestamp": "2025-09-10 02:30:55.661652", + "step": 7063, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 352 + ], + "flops": 10441544708224 + }, + "timestamp": "2025-09-10 02:30:55.696164", + "step": 7063, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0006347990711219609, + "timestamp": "2025-09-10 02:30:55.730783", + "step": 7064, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:55.763253", + "step": 7064, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.8621186579111964e-05, + "timestamp": "2025-09-10 02:30:55.768247", + "step": 7065, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 656 + ], + "flops": 19459015502528 + }, + "timestamp": "2025-09-10 02:30:55.823128", + "step": 7065, + "epoch": 3 + }, + { + "type": "loss", + "content": 3.0243045330280438e-05, + "timestamp": "2025-09-10 02:30:55.846549", + "step": 7066, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 288 + ], + "flops": 8543129804160 + }, + "timestamp": "2025-09-10 02:30:55.877340", + "step": 7066, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.00047078271745704114, + "timestamp": "2025-09-10 02:30:55.888271", + "step": 7067, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 240 + ], + "flops": 7119318626112 + }, + "timestamp": "2025-09-10 02:30:55.919101", + "step": 7067, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.756170528708026e-05, + "timestamp": "2025-09-10 02:30:55.947586", + "step": 7068, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 192 + ], + "flops": 5695507448064 + }, + "timestamp": "2025-09-10 02:30:55.978287", + "step": 7068, + "epoch": 3 + }, + { + "type": "loss", + "content": 0.0010376720456406474, + "timestamp": "2025-09-10 02:30:55.980530", + "step": 7069, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 4, + 528 + ], + "flops": 15662185694400 + }, + "timestamp": "2025-09-10 02:30:56.027381", + "step": 7069, + "epoch": 3 + }, + { + "type": "loss", + "content": 2.100633537338581e-05, + "timestamp": "2025-09-10 02:30:56.046434", + "step": 7070, + "epoch": 3 + }, + { + "type": "flops", + "content": { + "type": "train", + "batch_dim": [ + 3, + 224 + ], + "flops": 4983601869792 + }, + "timestamp": "2025-09-10 02:30:56.078505", + "step": 7070, + "epoch": 3 + }, + { + "type": "loss", + "content": 4.26122423959896e-05, + "timestamp": "2025-09-10 02:30:56.081611", + "step": 7071, + "epoch": 3 + }, + { + "type": "flops", + "content": [ + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 736 + ], + "batch_size": 8, + "flops": 14554433988352 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 480 + ], + "batch_size": 8, + "flops": 9492022189824 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 624 + ], + "batch_size": 8, + "flops": 12339628826496 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 432 + ], + "batch_size": 8, + "flops": 8542819977600 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 544 + ], + "batch_size": 8, + "flops": 10757625139456 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 656 + ], + "batch_size": 8, + "flops": 12972430301312 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 128 + ], + "batch_size": 8, + "flops": 2531205966848 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 496 + ], + "batch_size": 8, + "flops": 9808422927232 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 416 + ], + "batch_size": 8, + "flops": 8226419240192 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 400 + ], + "batch_size": 8, + "flops": 7910018502784 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 352 + ], + "batch_size": 8, + "flops": 6960816290560 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 288 + ], + "batch_size": 8, + "flops": 5695213340928 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 144 + ], + "batch_size": 8, + "flops": 2847606704256 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 464 + ], + "batch_size": 8, + "flops": 9175621452416 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 448 + ], + "batch_size": 8, + "flops": 8859220715008 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 560 + ], + "batch_size": 8, + "flops": 11074025876864 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 320 + ], + "batch_size": 8, + "flops": 6328014815744 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 304 + ], + "batch_size": 8, + "flops": 6011614078336 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 576 + ], + "batch_size": 8, + "flops": 11390426614272 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 368 + ], + "batch_size": 8, + "flops": 7277217027968 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 1168 + ], + "batch_size": 8, + "flops": 23097253898368 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 256 + ], + "batch_size": 8, + "flops": 5062411866112 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 160 + ], + "batch_size": 8, + "flops": 3164007441664 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 240 + ], + "batch_size": 8, + "flops": 4746011128704 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 640 + ], + "batch_size": 8, + "flops": 12656029563904 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 272 + ], + "batch_size": 8, + "flops": 5378812603520 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 224 + ], + "batch_size": 8, + "flops": 4429610391296 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 384 + ], + "batch_size": 8, + "flops": 7593617765376 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 192 + ], + "batch_size": 8, + "flops": 3796808916480 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 336 + ], + "batch_size": 8, + "flops": 6644415553152 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 8, + 176 + ], + "batch_size": 8, + "flops": 3480408179072 + }, + { + "type": "perplexity", + "in_batch_dim": [ + 6, + 208 + ], + "batch_size": 8, + "flops": 4113209653888 + } + ], + "timestamp": "2025-09-10 02:31:06.337196", + "step": 7071, + "epoch": 3 + }, + { + "type": "pplx", + "content": 26398362.306229845, + "timestamp": "2025-09-10 02:31:06.344312", + "step": 7071, + "epoch": 3 + }, + { + "type": "best_pplx", + "content": 12191892.104022551, + "timestamp": "2025-09-10 02:31:06.346146", + "step": 7071, + "epoch": 3 + }, + { + "type": "best_step", + "content": 147, + "timestamp": "2025-09-10 02:31:06.347858", + "step": 7071, + "epoch": 3 + }, + { + "type": "total_pplx_flops", + "content": 105693667713235200, + "timestamp": "2025-09-10 02:31:06.349825", + "step": 7071, + "epoch": 3 + }, + { + "type": "total_train_flops", + "content": 53674555878669600, + "timestamp": "2025-09-10 02:31:06.352004", + "step": 7071, + "epoch": 3 + } + ], + "best_evals": { + "pplx": { + "score": 12191892.104022551, + "step": 147 + }, + "rougel": { + "precision": 0.8507645259938837, + "recall": 0.8507645259938837, + "fmeasure": 0.8507645259938837 + } + } +} \ No newline at end of file